RDMA/mlx5: Track netdev to avoid deadlock during netdev notifier unregister
When removing a network namespace with mlx5 devlink instance being in it, following callchain is performed: cleanup_net (takes down_read(&pernet_ops_rwsem) devlink_pernet_pre_exit() devlink_reload() mlx5_devlink_reload_down() mlx5_unload_one_devl_locked() mlx5_detach_device() del_adev() mlx5r_remove() __mlx5_ib_remove() mlx5_ib_roce_cleanup() mlx5_remove_netdev_notifier() unregister_netdevice_notifier (takes down_write(&pernet_ops_rwsem) This deadlocks. Resolve this by converting to register_netdevice_notifier_dev_net() which does not take pernet_ops_rwsem and moves the notifier block around according to netdev it takes as arg. Use previously introduced netdev added/removed events to track uplink netdev to be used for register_netdevice_notifier_dev_net() purposes. Signed-off-by: Jiri Pirko <jiri@nvidia.com> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
parent
c7d4e6ab31
commit
dca55da0a1
@ -3012,26 +3012,63 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
|
||||
static void mlx5_netdev_notifier_register(struct mlx5_roce *roce,
|
||||
struct net_device *netdev)
|
||||
{
|
||||
int err;
|
||||
|
||||
dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event;
|
||||
err = register_netdevice_notifier(&dev->port[port_num].roce.nb);
|
||||
if (err) {
|
||||
dev->port[port_num].roce.nb.notifier_call = NULL;
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (roce->tracking_netdev)
|
||||
return;
|
||||
roce->tracking_netdev = netdev;
|
||||
roce->nb.notifier_call = mlx5_netdev_event;
|
||||
err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn);
|
||||
WARN_ON(err);
|
||||
}
|
||||
|
||||
static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u32 port_num)
|
||||
static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce)
|
||||
{
|
||||
if (dev->port[port_num].roce.nb.notifier_call) {
|
||||
unregister_netdevice_notifier(&dev->port[port_num].roce.nb);
|
||||
dev->port[port_num].roce.nb.notifier_call = NULL;
|
||||
if (!roce->tracking_netdev)
|
||||
return;
|
||||
unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb,
|
||||
&roce->nn);
|
||||
roce->tracking_netdev = NULL;
|
||||
}
|
||||
|
||||
static int mlx5e_mdev_notifier_event(struct notifier_block *nb,
|
||||
unsigned long event, void *data)
|
||||
{
|
||||
struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb);
|
||||
struct net_device *netdev = data;
|
||||
|
||||
switch (event) {
|
||||
case MLX5_DRIVER_EVENT_UPLINK_NETDEV:
|
||||
if (netdev)
|
||||
mlx5_netdev_notifier_register(roce, netdev);
|
||||
else
|
||||
mlx5_netdev_notifier_unregister(roce);
|
||||
break;
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num)
|
||||
{
|
||||
struct mlx5_roce *roce = &dev->port[port_num].roce;
|
||||
|
||||
roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event;
|
||||
mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb);
|
||||
mlx5_core_uplink_netdev_event_replay(dev->mdev);
|
||||
}
|
||||
|
||||
static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num)
|
||||
{
|
||||
struct mlx5_roce *roce = &dev->port[port_num].roce;
|
||||
|
||||
mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb);
|
||||
mlx5_netdev_notifier_unregister(roce);
|
||||
}
|
||||
|
||||
static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
|
||||
@ -3138,7 +3175,7 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
|
||||
if (mpi->mdev_events.notifier_call)
|
||||
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
|
||||
mpi->mdev_events.notifier_call = NULL;
|
||||
mlx5_remove_netdev_notifier(ibdev, port_num);
|
||||
mlx5_mdev_netdev_untrack(ibdev, port_num);
|
||||
spin_lock(&port->mp.mpi_lock);
|
||||
|
||||
comps = mpi->mdev_refcnt;
|
||||
@ -3196,12 +3233,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
|
||||
if (err)
|
||||
goto unbind;
|
||||
|
||||
err = mlx5_add_netdev_notifier(ibdev, port_num);
|
||||
if (err) {
|
||||
mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n",
|
||||
port_num + 1);
|
||||
goto unbind;
|
||||
}
|
||||
mlx5_mdev_netdev_track(ibdev, port_num);
|
||||
|
||||
mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
|
||||
mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
|
||||
@ -3909,9 +3941,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
|
||||
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
|
||||
|
||||
/* Register only for native ports */
|
||||
err = mlx5_add_netdev_notifier(dev, port_num);
|
||||
if (err)
|
||||
return err;
|
||||
mlx5_mdev_netdev_track(dev, port_num);
|
||||
|
||||
err = mlx5_enable_eth(dev);
|
||||
if (err)
|
||||
@ -3920,7 +3950,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev)
|
||||
|
||||
return 0;
|
||||
cleanup:
|
||||
mlx5_remove_netdev_notifier(dev, port_num);
|
||||
mlx5_mdev_netdev_untrack(dev, port_num);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -3938,7 +3968,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev)
|
||||
mlx5_disable_eth(dev);
|
||||
|
||||
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
|
||||
mlx5_remove_netdev_notifier(dev, port_num);
|
||||
mlx5_mdev_netdev_untrack(dev, port_num);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -832,6 +832,9 @@ struct mlx5_roce {
|
||||
rwlock_t netdev_lock;
|
||||
struct net_device *netdev;
|
||||
struct notifier_block nb;
|
||||
struct netdev_net_notifier nn;
|
||||
struct notifier_block mdev_nb;
|
||||
struct net_device *tracking_netdev;
|
||||
atomic_t tx_port_affinity;
|
||||
enum ib_port_state last_port_state;
|
||||
struct mlx5_ib_dev *dev;
|
||||
|
@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b
|
||||
|
||||
return blocking_notifier_chain_register(&events->sw_nh, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(mlx5_blocking_notifier_register);
|
||||
|
||||
int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
|
||||
{
|
||||
@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier
|
||||
|
||||
return blocking_notifier_chain_unregister(&events->sw_nh, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
|
||||
|
||||
int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
|
||||
void *data)
|
||||
|
Loading…
x
Reference in New Issue
Block a user