Mika Westerberg 87d0f2a553 PCI: pciehp: Prevent deadlock on disconnect
This addresses deadlocks in these common cases in hierarchies containing
two switches:

  - All involved ports are runtime suspended and they are unplugged. This
    can happen easily if the drivers involved automatically enable runtime
    PM (xHCI for example does that).

  - System is suspended (e.g., closing the lid on a laptop) with a dock +
    something else connected, and the dock is unplugged while suspended.

These cases lead to the following deadlock:

  INFO: task irq/126-pciehp:198 blocked for more than 120 seconds.
  irq/126-pciehp  D    0   198      2 0x80000000
  Call Trace:
   schedule+0x2c/0x80
   schedule_timeout+0x246/0x350
   wait_for_completion+0xb7/0x140
   kthread_stop+0x49/0x110
   free_irq+0x32/0x70
   pcie_shutdown_notification+0x2f/0x50
   pciehp_remove+0x27/0x50
   pcie_port_remove_service+0x36/0x50
   device_release_driver+0x12/0x20
   bus_remove_device+0xec/0x160
   device_del+0x13b/0x350
   device_unregister+0x1a/0x60
   remove_iter+0x1e/0x30
   device_for_each_child+0x56/0x90
   pcie_port_device_remove+0x22/0x40
   pcie_portdrv_remove+0x20/0x60
   pci_device_remove+0x3e/0xc0
   device_release_driver_internal+0x18c/0x250
   device_release_driver+0x12/0x20
   pci_stop_bus_device+0x6f/0x90
   pci_stop_bus_device+0x31/0x90
   pci_stop_and_remove_bus_device+0x12/0x20
   pciehp_unconfigure_device+0x88/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

  INFO: task irq/190-pciehp:2288 blocked for more than 120 seconds.
  irq/190-pciehp  D    0  2288      2 0x80000000
  Call Trace:
   __schedule+0x2a2/0x880
   schedule+0x2c/0x80
   schedule_preempt_disabled+0xe/0x10
   mutex_lock+0x2c/0x30
   pci_lock_rescan_remove+0x15/0x20
   pciehp_unconfigure_device+0x4d/0x140
   pciehp_disable_slot+0x6a/0x110
   pciehp_handle_presence_or_link_change+0x263/0x400
   pciehp_ist+0x1c9/0x1d0
   irq_thread_fn+0x24/0x60
   irq_thread+0xeb/0x190
   kthread+0x120/0x140

What happens here is that the whole hierarchy is runtime resumed and the
parent PCIe downstream port, which got the hot-remove event, starts
removing devices below it, taking pci_lock_rescan_remove() lock. When the
child PCIe port is runtime resumed it calls pciehp_check_presence() which
ends up calling pciehp_card_present() and pciehp_check_link_active().  Both
of these use pcie_capability_read_word(), which notices that the underlying
device is already gone and returns PCIBIOS_DEVICE_NOT_FOUND with the
capability value set to 0. When pciehp gets this value it thinks that its
child device is also hot-removed and schedules its IRQ thread to handle the
event.

The deadlock happens when the child's IRQ thread runs and tries to acquire
pci_lock_rescan_remove() which is already taken by the parent and the
parent waits for the child's IRQ thread to finish.

Prevent this from happening by checking the return value of
pcie_capability_read_word() and if it is PCIBIOS_DEVICE_NOT_FOUND stop
performing any hot-removal activities.

[bhelgaas: add common scenarios to commit log]
Link: https://lore.kernel.org/r/20191029170022.57528-2-mika.westerberg@linux.intel.com
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2019-11-12 17:17:42 -06:00

201 lines
8.0 KiB
C

/* SPDX-License-Identifier: GPL-2.0+ */
/*
* PCI Express Hot Plug Controller Driver
*
* Copyright (C) 1995,2001 Compaq Computer Corporation
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
* Copyright (C) 2001 IBM Corp.
* Copyright (C) 2003-2004 Intel Corporation
*
* All rights reserved.
*
* Send feedback to <greg@kroah.com>, <kristen.c.accardi@intel.com>
*
*/
#ifndef _PCIEHP_H
#define _PCIEHP_H
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/pci_hotplug.h>
#include <linux/delay.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/workqueue.h>
#include "../pcie/portdrv.h"
extern bool pciehp_poll_mode;
extern int pciehp_poll_time;
/*
* Set CONFIG_DYNAMIC_DEBUG=y and boot with 'dyndbg="file pciehp* +p"' to
* enable debug messages.
*/
#define ctrl_dbg(ctrl, format, arg...) \
pci_dbg(ctrl->pcie->port, format, ## arg)
#define ctrl_err(ctrl, format, arg...) \
pci_err(ctrl->pcie->port, format, ## arg)
#define ctrl_info(ctrl, format, arg...) \
pci_info(ctrl->pcie->port, format, ## arg)
#define ctrl_warn(ctrl, format, arg...) \
pci_warn(ctrl->pcie->port, format, ## arg)
#define SLOT_NAME_SIZE 10
/**
* struct controller - PCIe hotplug controller
* @pcie: pointer to the controller's PCIe port service device
* @slot_cap: cached copy of the Slot Capabilities register
* @slot_ctrl: cached copy of the Slot Control register
* @ctrl_lock: serializes writes to the Slot Control register
* @cmd_started: jiffies when the Slot Control register was last written;
* the next write is allowed 1 second later, absent a Command Completed
* interrupt (PCIe r4.0, sec 6.7.3.2)
* @cmd_busy: flag set on Slot Control register write, cleared by IRQ handler
* on reception of a Command Completed event
* @queue: wait queue to wake up on reception of a Command Completed event,
* used for synchronous writes to the Slot Control register
* @pending_events: used by the IRQ handler to save events retrieved from the
* Slot Status register for later consumption by the IRQ thread
* @notification_enabled: whether the IRQ was requested successfully
* @power_fault_detected: whether a power fault was detected by the hardware
* that has not yet been cleared by the user
* @poll_thread: thread to poll for slot events if no IRQ is available,
* enabled with pciehp_poll_mode module parameter
* @state: current state machine position
* @state_lock: protects reads and writes of @state;
* protects scheduling, execution and cancellation of @button_work
* @button_work: work item to turn the slot on or off after 5 seconds
* in response to an Attention Button press
* @hotplug_slot: structure registered with the PCI hotplug core
* @reset_lock: prevents access to the Data Link Layer Link Active bit in the
* Link Status register and to the Presence Detect State bit in the Slot
* Status register during a slot reset which may cause them to flap
* @ist_running: flag to keep user request waiting while IRQ thread is running
* @request_result: result of last user request submitted to the IRQ thread
* @requester: wait queue to wake up on completion of user request,
* used for synchronous slot enable/disable request via sysfs
*
* PCIe hotplug has a 1:1 relationship between controller and slot, hence
* unlike other drivers, the two aren't represented by separate structures.
*/
struct controller {
struct pcie_device *pcie;
u32 slot_cap; /* capabilities and quirks */
u16 slot_ctrl; /* control register access */
struct mutex ctrl_lock;
unsigned long cmd_started;
unsigned int cmd_busy:1;
wait_queue_head_t queue;
atomic_t pending_events; /* event handling */
unsigned int notification_enabled:1;
unsigned int power_fault_detected;
struct task_struct *poll_thread;
u8 state; /* state machine */
struct mutex state_lock;
struct delayed_work button_work;
struct hotplug_slot hotplug_slot; /* hotplug core interface */
struct rw_semaphore reset_lock;
unsigned int ist_running;
int request_result;
wait_queue_head_t requester;
};
/**
* DOC: Slot state
*
* @OFF_STATE: slot is powered off, no subordinate devices are enumerated
* @BLINKINGON_STATE: slot will be powered on after the 5 second delay,
* Power Indicator is blinking
* @BLINKINGOFF_STATE: slot will be powered off after the 5 second delay,
* Power Indicator is blinking
* @POWERON_STATE: slot is currently powering on
* @POWEROFF_STATE: slot is currently powering off
* @ON_STATE: slot is powered on, subordinate devices have been enumerated
*/
#define OFF_STATE 0
#define BLINKINGON_STATE 1
#define BLINKINGOFF_STATE 2
#define POWERON_STATE 3
#define POWEROFF_STATE 4
#define ON_STATE 5
/**
* DOC: Flags to request an action from the IRQ thread
*
* These are stored together with events read from the Slot Status register,
* hence must be greater than its 16-bit width.
*
* %DISABLE_SLOT: Disable the slot in response to a user request via sysfs or
* an Attention Button press after the 5 second delay
* %RERUN_ISR: Used by the IRQ handler to inform the IRQ thread that the
* hotplug port was inaccessible when the interrupt occurred, requiring
* that the IRQ handler is rerun by the IRQ thread after it has made the
* hotplug port accessible by runtime resuming its parents to D0
*/
#define DISABLE_SLOT (1 << 16)
#define RERUN_ISR (1 << 17)
#define ATTN_BUTTN(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_ABP)
#define POWER_CTRL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PCP)
#define MRL_SENS(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_MRLSP)
#define ATTN_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_AIP)
#define PWR_LED(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_PIP)
#define HP_SUPR_RM(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_HPS)
#define EMI(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_EIP)
#define NO_CMD_CMPL(ctrl) ((ctrl)->slot_cap & PCI_EXP_SLTCAP_NCCS)
#define PSN(ctrl) (((ctrl)->slot_cap & PCI_EXP_SLTCAP_PSN) >> 19)
void pciehp_request(struct controller *ctrl, int action);
void pciehp_handle_button_press(struct controller *ctrl);
void pciehp_handle_disable_request(struct controller *ctrl);
void pciehp_handle_presence_or_link_change(struct controller *ctrl, u32 events);
int pciehp_configure_device(struct controller *ctrl);
void pciehp_unconfigure_device(struct controller *ctrl, bool presence);
void pciehp_queue_pushbutton_work(struct work_struct *work);
struct controller *pcie_init(struct pcie_device *dev);
int pcie_init_notification(struct controller *ctrl);
void pcie_shutdown_notification(struct controller *ctrl);
void pcie_clear_hotplug_events(struct controller *ctrl);
void pcie_enable_interrupt(struct controller *ctrl);
void pcie_disable_interrupt(struct controller *ctrl);
int pciehp_power_on_slot(struct controller *ctrl);
void pciehp_power_off_slot(struct controller *ctrl);
void pciehp_get_power_status(struct controller *ctrl, u8 *status);
#define INDICATOR_NOOP -1 /* Leave indicator unchanged */
void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn);
void pciehp_get_latch_status(struct controller *ctrl, u8 *status);
int pciehp_query_power_fault(struct controller *ctrl);
int pciehp_card_present(struct controller *ctrl);
int pciehp_card_present_or_link_active(struct controller *ctrl);
int pciehp_check_link_status(struct controller *ctrl);
int pciehp_check_link_active(struct controller *ctrl);
void pciehp_release_ctrl(struct controller *ctrl);
int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot);
int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot);
int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe);
int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status);
int pciehp_set_raw_indicator_status(struct hotplug_slot *h_slot, u8 status);
int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
static inline const char *slot_name(struct controller *ctrl)
{
return hotplug_slot_name(&ctrl->hotplug_slot);
}
static inline struct controller *to_ctrl(struct hotplug_slot *hotplug_slot)
{
return container_of(hotplug_slot, struct controller, hotplug_slot);
}
#endif /* _PCIEHP_H */