chelsio: cxgb: Disable the card on error in threaded interrupt

t1_fatal_err() is invoked from the interrupt handler. The bad part is
that it invokes (via t1_sge_stop()) del_timer_sync() and tasklet_kill().
Both functions must not be called from an interrupt because it is
possible that it will wait for the completion of the timer/tasklet it
just interrupted.

In case of a fatal error, use t1_interrupts_disable() to disable all
interrupt sources and then wake the interrupt thread with
F_PL_INTR_SGE_ERR as pending flag. The threaded-interrupt will stop the
card via t1_sge_stop() and not re-enable the interrupts again.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Sebastian Andrzej Siewior 2021-02-02 18:01:04 +01:00 committed by Jakub Kicinski
parent fec7fa0a75
commit 82154580a7
5 changed files with 44 additions and 27 deletions

View File

@ -346,7 +346,6 @@ int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi,
int t1_init_hw_modules(adapter_t *adapter);
int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi);
void t1_free_sw_modules(adapter_t *adapter);
void t1_fatal_err(adapter_t *adapter);
void t1_link_changed(adapter_t *adapter, int port_id);
void t1_link_negotiated(adapter_t *adapter, int port_id, int link_stat,
int speed, int duplex, int pause);

View File

@ -917,16 +917,6 @@ static void mac_stats_task(struct work_struct *work)
spin_unlock(&adapter->work_lock);
}
void t1_fatal_err(struct adapter *adapter)
{
if (adapter->flags & FULL_INIT_DONE) {
t1_sge_stop(adapter->sge);
t1_interrupts_disable(adapter);
}
pr_alert("%s: encountered fatal error, operation suspended\n",
adapter->name);
}
static const struct net_device_ops cxgb_netdev_ops = {
.ndo_open = cxgb_open,
.ndo_stop = cxgb_close,

View File

@ -940,10 +940,11 @@ void t1_sge_intr_clear(struct sge *sge)
/*
* SGE 'Error' interrupt handler
*/
int t1_sge_intr_error_handler(struct sge *sge)
bool t1_sge_intr_error_handler(struct sge *sge)
{
struct adapter *adapter = sge->adapter;
u32 cause = readl(adapter->regs + A_SG_INT_CAUSE);
bool wake = false;
if (adapter->port[0].dev->hw_features & NETIF_F_TSO)
cause &= ~F_PACKET_TOO_BIG;
@ -967,11 +968,14 @@ int t1_sge_intr_error_handler(struct sge *sge)
sge->stats.pkt_mismatch++;
pr_alert("%s: SGE packet mismatch\n", adapter->name);
}
if (cause & SGE_INT_FATAL)
t1_fatal_err(adapter);
if (cause & SGE_INT_FATAL) {
t1_interrupts_disable(adapter);
adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
wake = true;
}
writel(cause, adapter->regs + A_SG_INT_CAUSE);
return 0;
return wake;
}
const struct sge_intr_counts *t1_sge_get_intr_counts(const struct sge *sge)
@ -1635,6 +1639,14 @@ irqreturn_t t1_interrupt_thread(int irq, void *data)
if (pending_thread_intr & F_PL_INTR_EXT)
t1_elmer0_ext_intr_handler(adapter);
/* This error is fatal, interrupts remain off */
if (pending_thread_intr & F_PL_INTR_SGE_ERR) {
pr_alert("%s: encountered fatal error, operation suspended\n",
adapter->name);
t1_sge_stop(adapter->sge);
return IRQ_HANDLED;
}
spin_lock_irq(&adapter->async_lock);
adapter->slow_intr_mask |= F_PL_INTR_EXT;

View File

@ -82,7 +82,7 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
void t1_sge_start(struct sge *);
void t1_sge_stop(struct sge *);
int t1_sge_intr_error_handler(struct sge *);
bool t1_sge_intr_error_handler(struct sge *sge);
void t1_sge_intr_enable(struct sge *);
void t1_sge_intr_disable(struct sge *);
void t1_sge_intr_clear(struct sge *);

View File

@ -170,7 +170,7 @@ void t1_link_changed(adapter_t *adapter, int port_id)
t1_link_negotiated(adapter, port_id, link_ok, speed, duplex, fc);
}
static int t1_pci_intr_handler(adapter_t *adapter)
static bool t1_pci_intr_handler(adapter_t *adapter)
{
u32 pcix_cause;
@ -179,9 +179,13 @@ static int t1_pci_intr_handler(adapter_t *adapter)
if (pcix_cause) {
pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE,
pcix_cause);
t1_fatal_err(adapter); /* PCI errors are fatal */
/* PCI errors are fatal */
t1_interrupts_disable(adapter);
adapter->pending_thread_intr |= F_PL_INTR_SGE_ERR;
pr_alert("%s: PCI error encountered.\n", adapter->name);
return true;
}
return 0;
return false;
}
#ifdef CONFIG_CHELSIO_T1_1G
@ -213,10 +217,13 @@ static int fpga_phy_intr_handler(adapter_t *adapter)
static irqreturn_t fpga_slow_intr(adapter_t *adapter)
{
u32 cause = readl(adapter->regs + A_PL_CAUSE);
irqreturn_t ret = IRQ_NONE;
cause &= ~F_PL_INTR_SGE_DATA;
if (cause & F_PL_INTR_SGE_ERR)
t1_sge_intr_error_handler(adapter->sge);
if (cause & F_PL_INTR_SGE_ERR) {
if (t1_sge_intr_error_handler(adapter->sge))
ret = IRQ_WAKE_THREAD;
}
if (cause & FPGA_PCIX_INTERRUPT_GMAC)
fpga_phy_intr_handler(adapter);
@ -231,13 +238,18 @@ static irqreturn_t fpga_slow_intr(adapter_t *adapter)
/* Clear TP interrupt */
writel(tp_cause, adapter->regs + FPGA_TP_ADDR_INTERRUPT_CAUSE);
}
if (cause & FPGA_PCIX_INTERRUPT_PCIX)
t1_pci_intr_handler(adapter);
if (cause & FPGA_PCIX_INTERRUPT_PCIX) {
if (t1_pci_intr_handler(adapter))
ret = IRQ_WAKE_THREAD;
}
/* Clear the interrupts just processed. */
if (cause)
writel(cause, adapter->regs + A_PL_CAUSE);
if (ret != IRQ_NONE)
return ret;
return cause == 0 ? IRQ_NONE : IRQ_HANDLED;
}
#endif
@ -850,14 +862,18 @@ static irqreturn_t asic_slow_intr(adapter_t *adapter)
cause &= adapter->slow_intr_mask;
if (!cause)
return IRQ_NONE;
if (cause & F_PL_INTR_SGE_ERR)
t1_sge_intr_error_handler(adapter->sge);
if (cause & F_PL_INTR_SGE_ERR) {
if (t1_sge_intr_error_handler(adapter->sge))
ret = IRQ_WAKE_THREAD;
}
if (cause & F_PL_INTR_TP)
t1_tp_intr_handler(adapter->tp);
if (cause & F_PL_INTR_ESPI)
t1_espi_intr_handler(adapter->espi);
if (cause & F_PL_INTR_PCIX)
t1_pci_intr_handler(adapter);
if (cause & F_PL_INTR_PCIX) {
if (t1_pci_intr_handler(adapter))
ret = IRQ_WAKE_THREAD;
}
if (cause & F_PL_INTR_EXT) {
/* Wake the threaded interrupt to handle external interrupts as
* we require a process context. We disable EXT interrupts in