diff --git a/drivers/staging/lustre/include/linux/lnet/lib-types.h b/drivers/staging/lustre/include/linux/lnet/lib-types.h index e4a8f6e4e4b1..06d46568b7b5 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-types.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-types.h @@ -635,6 +635,13 @@ typedef struct { */ bool ln_nis_from_mod_params; + /* + * waitq for router checker. As long as there are no routes in + * the list, the router checker will sleep on this queue. when + * routes are added the thread will wake up + */ + wait_queue_head_t ln_rc_waitq; + } lnet_t; #endif diff --git a/drivers/staging/lustre/lnet/lnet/api-ni.c b/drivers/staging/lustre/lnet/lnet/api-ni.c index ed121a869205..0ec656ac0fd4 100644 --- a/drivers/staging/lustre/lnet/lnet/api-ni.c +++ b/drivers/staging/lustre/lnet/lnet/api-ni.c @@ -98,6 +98,7 @@ lnet_init_locks(void) { spin_lock_init(&the_lnet.ln_eq_wait_lock); init_waitqueue_head(&the_lnet.ln_eq_waitq); + init_waitqueue_head(&the_lnet.ln_rc_waitq); mutex_init(&the_lnet.ln_lnd_mutex); mutex_init(&the_lnet.ln_api_mutex); } diff --git a/drivers/staging/lustre/lnet/lnet/router.c b/drivers/staging/lustre/lnet/lnet/router.c index 511e44602694..ad9cd4489716 100644 --- a/drivers/staging/lustre/lnet/lnet/router.c +++ b/drivers/staging/lustre/lnet/lnet/router.c @@ -405,6 +405,9 @@ lnet_add_route(__u32 net, unsigned int hops, lnet_nid_t gateway, if (rnet != rnet2) LIBCFS_FREE(rnet, sizeof(*rnet)); + /* indicate to startup the router checker if configured */ + wake_up(&the_lnet.ln_rc_waitq); + return rc; } @@ -1056,11 +1059,6 @@ lnet_router_checker_start(void) return -EINVAL; } - if (!the_lnet.ln_routing && - live_router_check_interval <= 0 && - dead_router_check_interval <= 0) - return 0; - sema_init(&the_lnet.ln_rc_signal, 0); /* * EQ size doesn't matter; the callback is guaranteed to get every @@ -1109,6 +1107,8 @@ lnet_router_checker_stop(void) LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_RUNNING); the_lnet.ln_rc_state = LNET_RC_STATE_STOPPING; + /* wakeup the RC thread if it's sleeping */ + wake_up(&the_lnet.ln_rc_waitq); /* block until event callback signals exit */ down(&the_lnet.ln_rc_signal); @@ -1199,6 +1199,33 @@ lnet_prune_rc_data(int wait_unlink) lnet_net_unlock(LNET_LOCK_EX); } +/* + * This function is called to check if the RC should block indefinitely. + * It's called from lnet_router_checker() as well as being passed to + * wait_event_interruptible() to avoid the lost wake_up problem. + * + * When it's called from wait_event_interruptible() it is necessary to + * also not sleep if the rc state is not running to avoid a deadlock + * when the system is shutting down + */ +static inline bool +lnet_router_checker_active(void) +{ + if (the_lnet.ln_rc_state != LNET_RC_STATE_RUNNING) + return true; + + /* + * Router Checker thread needs to run when routing is enabled in + * order to call lnet_update_ni_status_locked() + */ + if (the_lnet.ln_routing) + return true; + + return !list_empty(&the_lnet.ln_routers) && + (live_router_check_interval > 0 || + dead_router_check_interval > 0); +} + static int lnet_router_checker(void *arg) { @@ -1252,8 +1279,18 @@ rescan: * because kernel counts # active tasks as nr_running * + nr_uninterruptible. */ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(cfs_time_seconds(1)); + /* + * if there are any routes then wakeup every second. If + * there are no routes then sleep indefinitely until woken + * up by a user adding a route + */ + if (!lnet_router_checker_active()) + wait_event_interruptible(the_lnet.ln_rc_waitq, + lnet_router_checker_active()); + else + wait_event_interruptible_timeout(the_lnet.ln_rc_waitq, + false, + cfs_time_seconds(1)); } LASSERT(the_lnet.ln_rc_state == LNET_RC_STATE_STOPPING);