From 8da77df649c43d9cec70d683ee317ba5d49d09b7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:31 +0100 Subject: [PATCH 1/7] net: phy: always call phy_process_state_change() under lock phy_stop() calls phy_process_state_change() while holding the phydev lock, so also arrange for phy_state_machine() to do the same, so that this function is called with consistent locking. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index df54c137c5f5..1e5218935eb3 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1506,6 +1506,7 @@ void phy_state_machine(struct work_struct *work) if (err < 0) phy_error_precise(phydev, func, err); + mutex_lock(&phydev->lock); phy_process_state_change(phydev, old_state); /* Only re-schedule a PHY state machine change if we are polling the @@ -1516,7 +1517,6 @@ void phy_state_machine(struct work_struct *work) * state machine would be pointless and possibly error prone when * called from phy_disconnect() synchronously. */ - mutex_lock(&phydev->lock); if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); mutex_unlock(&phydev->lock); From ef113a60d0a901c28c1b844bf967c180528155c5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:36 +0100 Subject: [PATCH 2/7] net: phy: call phy_error_precise() while holding the lock Move the locking out of phy_error_precise() and to its only call site, merging with the locked region that has already been taken. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 1e5218935eb3..990d387b31bd 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1231,9 +1231,7 @@ static void phy_error_precise(struct phy_device *phydev, const void *func, int err) { WARN(1, "%pS: returned: %d\n", func, err); - mutex_lock(&phydev->lock); phy_process_error(phydev); - mutex_unlock(&phydev->lock); } /** @@ -1503,10 +1501,10 @@ void phy_state_machine(struct work_struct *work) if (err == -ENODEV) return; + mutex_lock(&phydev->lock); if (err < 0) phy_error_precise(phydev, func, err); - mutex_lock(&phydev->lock); phy_process_state_change(phydev, old_state); /* Only re-schedule a PHY state machine change if we are polling the From ea5968cd7d6e6f237cfc6d00aa29ff1dc9230b2f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:41 +0100 Subject: [PATCH 3/7] net: phy: move call to start aneg Move the call to start auto-negotiation inside the lock in the PHYLIB state machine, calling the locked variant _phy_start_aneg(). This avoids unnecessarily releasing and re-acquiring the lock. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 990d387b31bd..5bb33af2a4cb 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1489,14 +1489,15 @@ void phy_state_machine(struct work_struct *work) break; } + if (needs_aneg) { + err = _phy_start_aneg(phydev); + func = &_phy_start_aneg; + } + mutex_unlock(&phydev->lock); - if (needs_aneg) { - err = phy_start_aneg(phydev); - func = &phy_start_aneg; - } else if (do_suspend) { + if (do_suspend) phy_suspend(phydev); - } if (err == -ENODEV) return; From 6e19b3502c59ad21aa405383dfd0bdf4e841de57 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:47 +0100 Subject: [PATCH 4/7] net: phy: move phy_suspend() to end of phy_state_machine() Move the call to phy_suspend() to the end of phy_state_machine() after we release the lock so that we can combine the locked areas. phy_suspend() can not be called while holding phydev->lock as it has caused deadlocks in the past. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 5bb33af2a4cb..756326f38b14 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1494,15 +1494,11 @@ void phy_state_machine(struct work_struct *work) func = &_phy_start_aneg; } - mutex_unlock(&phydev->lock); - - if (do_suspend) - phy_suspend(phydev); - - if (err == -ENODEV) + if (err == -ENODEV) { + mutex_unlock(&phydev->lock); return; + } - mutex_lock(&phydev->lock); if (err < 0) phy_error_precise(phydev, func, err); @@ -1519,6 +1515,9 @@ void phy_state_machine(struct work_struct *work) if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); mutex_unlock(&phydev->lock); + + if (do_suspend) + phy_suspend(phydev); } /** From c398ef41b6d48bd204260b136b4517e0f130892a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:52 +0100 Subject: [PATCH 5/7] net: phy: move phy_state_machine() Move phy_state_machine() before phy_stop() to avoid subsequent patches introducing forward references. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 152 +++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 756326f38b14..20e23fa9cf96 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1353,82 +1353,6 @@ void phy_free_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_free_interrupt); -/** - * phy_stop - Bring down the PHY link, and stop checking the status - * @phydev: target phy_device struct - */ -void phy_stop(struct phy_device *phydev) -{ - struct net_device *dev = phydev->attached_dev; - enum phy_state old_state; - - if (!phy_is_started(phydev) && phydev->state != PHY_DOWN && - phydev->state != PHY_ERROR) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); - return; - } - - mutex_lock(&phydev->lock); - old_state = phydev->state; - - if (phydev->state == PHY_CABLETEST) { - phy_abort_cable_test(phydev); - netif_testing_off(dev); - } - - if (phydev->sfp_bus) - sfp_upstream_stop(phydev->sfp_bus); - - phydev->state = PHY_HALTED; - phy_process_state_change(phydev, old_state); - - mutex_unlock(&phydev->lock); - - phy_state_machine(&phydev->state_queue.work); - phy_stop_machine(phydev); - - /* Cannot call flush_scheduled_work() here as desired because - * of rtnl_lock(), but PHY_HALTED shall guarantee irq handler - * will not reenable interrupts. - */ -} -EXPORT_SYMBOL(phy_stop); - -/** - * phy_start - start or restart a PHY device - * @phydev: target phy_device struct - * - * Description: Indicates the attached device's readiness to - * handle PHY-related work. Used during startup to start the - * PHY, and after a call to phy_stop() to resume operation. - * Also used to indicate the MDIO bus has cleared an error - * condition. - */ -void phy_start(struct phy_device *phydev) -{ - mutex_lock(&phydev->lock); - - if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) { - WARN(1, "called from state %s\n", - phy_state_to_str(phydev->state)); - goto out; - } - - if (phydev->sfp_bus) - sfp_upstream_start(phydev->sfp_bus); - - /* if phy was suspended, bring the physical link up again */ - __phy_resume(phydev); - - phydev->state = PHY_UP; - - phy_start_machine(phydev); -out: - mutex_unlock(&phydev->lock); -} -EXPORT_SYMBOL(phy_start); - /** * phy_state_machine - Handle the state machine * @work: work_struct that describes the work to be done @@ -1520,6 +1444,82 @@ void phy_state_machine(struct work_struct *work) phy_suspend(phydev); } +/** + * phy_stop - Bring down the PHY link, and stop checking the status + * @phydev: target phy_device struct + */ +void phy_stop(struct phy_device *phydev) +{ + struct net_device *dev = phydev->attached_dev; + enum phy_state old_state; + + if (!phy_is_started(phydev) && phydev->state != PHY_DOWN && + phydev->state != PHY_ERROR) { + WARN(1, "called from state %s\n", + phy_state_to_str(phydev->state)); + return; + } + + mutex_lock(&phydev->lock); + old_state = phydev->state; + + if (phydev->state == PHY_CABLETEST) { + phy_abort_cable_test(phydev); + netif_testing_off(dev); + } + + if (phydev->sfp_bus) + sfp_upstream_stop(phydev->sfp_bus); + + phydev->state = PHY_HALTED; + phy_process_state_change(phydev, old_state); + + mutex_unlock(&phydev->lock); + + phy_state_machine(&phydev->state_queue.work); + phy_stop_machine(phydev); + + /* Cannot call flush_scheduled_work() here as desired because + * of rtnl_lock(), but PHY_HALTED shall guarantee irq handler + * will not reenable interrupts. + */ +} +EXPORT_SYMBOL(phy_stop); + +/** + * phy_start - start or restart a PHY device + * @phydev: target phy_device struct + * + * Description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + mutex_lock(&phydev->lock); + + if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) { + WARN(1, "called from state %s\n", + phy_state_to_str(phydev->state)); + goto out; + } + + if (phydev->sfp_bus) + sfp_upstream_start(phydev->sfp_bus); + + /* if phy was suspended, bring the physical link up again */ + __phy_resume(phydev); + + phydev->state = PHY_UP; + + phy_start_machine(phydev); +out: + mutex_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_start); + /** * phy_mac_interrupt - MAC says the link has changed * @phydev: phy_device struct with changed link From 8635c0663e6bd8b4a11c0cf33d3b067db67ea819 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:35:57 +0100 Subject: [PATCH 6/7] net: phy: split locked and unlocked section of phy_state_machine() Split out the locked and unlocked sections of phy_state_machine() into two separate functions which can be called inside the phydev lock and outside the phydev lock as appropriate, thus allowing us to combine the locked regions in the caller of phy_state_machine() with the locked region inside phy_state_machine(). This avoids unnecessarily dropping the phydev lock which may allow races to occur. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 68 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 26 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 20e23fa9cf96..d78c2cc003ce 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1353,33 +1353,27 @@ void phy_free_interrupt(struct phy_device *phydev) } EXPORT_SYMBOL(phy_free_interrupt); -/** - * phy_state_machine - Handle the state machine - * @work: work_struct that describes the work to be done - */ -void phy_state_machine(struct work_struct *work) +enum phy_state_work { + PHY_STATE_WORK_NONE, + PHY_STATE_WORK_ANEG, + PHY_STATE_WORK_SUSPEND, +}; + +static enum phy_state_work _phy_state_machine(struct phy_device *phydev) { - struct delayed_work *dwork = to_delayed_work(work); - struct phy_device *phydev = - container_of(dwork, struct phy_device, state_queue); + enum phy_state_work state_work = PHY_STATE_WORK_NONE; struct net_device *dev = phydev->attached_dev; - bool needs_aneg = false, do_suspend = false; - enum phy_state old_state; + enum phy_state old_state = phydev->state; const void *func = NULL; bool finished = false; int err = 0; - mutex_lock(&phydev->lock); - - old_state = phydev->state; - switch (phydev->state) { case PHY_DOWN: case PHY_READY: break; case PHY_UP: - needs_aneg = true; - + state_work = PHY_STATE_WORK_ANEG; break; case PHY_NOLINK: case PHY_RUNNING: @@ -1391,7 +1385,7 @@ void phy_state_machine(struct work_struct *work) if (err) { phy_abort_cable_test(phydev); netif_testing_off(dev); - needs_aneg = true; + state_work = PHY_STATE_WORK_ANEG; phydev->state = PHY_UP; break; } @@ -1399,7 +1393,7 @@ void phy_state_machine(struct work_struct *work) if (finished) { ethnl_cable_test_finished(phydev); netif_testing_off(dev); - needs_aneg = true; + state_work = PHY_STATE_WORK_ANEG; phydev->state = PHY_UP; } break; @@ -1409,19 +1403,17 @@ void phy_state_machine(struct work_struct *work) phydev->link = 0; phy_link_down(phydev); } - do_suspend = true; + state_work = PHY_STATE_WORK_SUSPEND; break; } - if (needs_aneg) { + if (state_work == PHY_STATE_WORK_ANEG) { err = _phy_start_aneg(phydev); func = &_phy_start_aneg; } - if (err == -ENODEV) { - mutex_unlock(&phydev->lock); - return; - } + if (err == -ENODEV) + return state_work; if (err < 0) phy_error_precise(phydev, func, err); @@ -1438,10 +1430,34 @@ void phy_state_machine(struct work_struct *work) */ if (phy_polling_mode(phydev) && phy_is_started(phydev)) phy_queue_state_machine(phydev, PHY_STATE_TIME); + + return state_work; +} + +/* unlocked part of the PHY state machine */ +static void _phy_state_machine_post_work(struct phy_device *phydev, + enum phy_state_work state_work) +{ + if (state_work == PHY_STATE_WORK_SUSPEND) + phy_suspend(phydev); +} + +/** + * phy_state_machine - Handle the state machine + * @work: work_struct that describes the work to be done + */ +void phy_state_machine(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct phy_device *phydev = + container_of(dwork, struct phy_device, state_queue); + enum phy_state_work state_work; + + mutex_lock(&phydev->lock); + state_work = _phy_state_machine(phydev); mutex_unlock(&phydev->lock); - if (do_suspend) - phy_suspend(phydev); + _phy_state_machine_post_work(phydev, state_work); } /** From adcbb85508c8bb181704154eec1dfe20554ddfe9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 14 Sep 2023 16:36:02 +0100 Subject: [PATCH 7/7] net: phy: convert phy_stop() to use split state machine Convert phy_stop() to use the new locked-section and unlocked-section parts of the PHY state machine. Tested-by: Jijie Shao Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d78c2cc003ce..93a8676dd8d8 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1467,6 +1467,7 @@ void phy_state_machine(struct work_struct *work) void phy_stop(struct phy_device *phydev) { struct net_device *dev = phydev->attached_dev; + enum phy_state_work state_work; enum phy_state old_state; if (!phy_is_started(phydev) && phydev->state != PHY_DOWN && @@ -1490,9 +1491,10 @@ void phy_stop(struct phy_device *phydev) phydev->state = PHY_HALTED; phy_process_state_change(phydev, old_state); + state_work = _phy_state_machine(phydev); mutex_unlock(&phydev->lock); - phy_state_machine(&phydev->state_queue.work); + _phy_state_machine_post_work(phydev, state_work); phy_stop_machine(phydev); /* Cannot call flush_scheduled_work() here as desired because