1
0
mirror of https://github.com/systemd/systemd.git synced 2024-11-02 19:21:53 +03:00

udevd: kill hanging event processes after 30 seconds

Some broken kernel drivers load firmware synchronously in the module init
path and block modprobe until the firmware request is fulfilled.

The modprobe-generated firmware request is a direct child device of the
device which caused modprobe to run. Child device event are blocked until
the parent device is handled. This dead-locks until the kernel firmware
loading timeout of 60 seconds is reached.

The hanging modprobe event should now time-out and allow the firmware
event to run before the 60 second kernel timeout.
This commit is contained in:
Kay Sievers 2012-01-18 05:06:18 +01:00
parent 1b9e13e2e2
commit e64fae5573
2 changed files with 51 additions and 13 deletions

View File

@ -49,7 +49,7 @@ struct udev_event *udev_event_new(struct udev_device *dev)
udev_list_init(udev, &event->run_list, false); udev_list_init(udev, &event->run_list, false);
event->fd_signal = -1; event->fd_signal = -1;
event->birth_usec = now_usec(); event->birth_usec = now_usec();
event->timeout_usec = 60 * 1000 * 1000; event->timeout_usec = 30 * 1000 * 1000;
dbg(event->udev, "allocated event %p\n", event); dbg(event->udev, "allocated event %p\n", event);
return event; return event;
} }

View File

@ -133,6 +133,7 @@ struct worker {
struct udev_monitor *monitor; struct udev_monitor *monitor;
enum worker_state state; enum worker_state state;
struct event *event; struct event *event;
unsigned long long event_start_usec;
}; };
/* passed from worker to main process */ /* passed from worker to main process */
@ -372,6 +373,7 @@ out:
close(fd_inotify); close(fd_inotify);
close(worker_watch[WRITE_END]); close(worker_watch[WRITE_END]);
udev_rules_unref(rules); udev_rules_unref(rules);
udev_builtin_exit(udev);
udev_monitor_unref(worker_monitor); udev_monitor_unref(worker_monitor);
udev_unref(udev); udev_unref(udev);
udev_log_close(); udev_log_close();
@ -389,6 +391,7 @@ out:
worker->monitor = worker_monitor; worker->monitor = worker_monitor;
worker->pid = pid; worker->pid = pid;
worker->state = WORKER_RUNNING; worker->state = WORKER_RUNNING;
worker->event_start_usec = now_usec();
worker->event = event; worker->event = event;
event->state = EVENT_RUNNING; event->state = EVENT_RUNNING;
udev_list_node_append(&worker->node, &worker_list); udev_list_node_append(&worker->node, &worker_list);
@ -419,6 +422,7 @@ static void event_run(struct event *event)
worker_ref(worker); worker_ref(worker);
worker->event = event; worker->event = event;
worker->state = WORKER_RUNNING; worker->state = WORKER_RUNNING;
worker->event_start_usec = now_usec();
event->state = EVENT_RUNNING; event->state = EVENT_RUNNING;
return; return;
} }
@ -610,9 +614,11 @@ static void worker_returned(int fd_worker)
continue; continue;
/* worker returned */ /* worker returned */
worker->event->exitcode = msg.exitcode; if (worker->event) {
event_queue_delete(worker->event, true); worker->event->exitcode = msg.exitcode;
worker->event = NULL; event_queue_delete(worker->event, true);
worker->event = NULL;
}
if (worker->state != WORKER_KILLED) if (worker->state != WORKER_KILLED)
worker->state = WORKER_IDLE; worker->state = WORKER_IDLE;
worker_unref(worker); worker_unref(worker);
@ -796,7 +802,7 @@ static void handle_signal(struct udev *udev, int signo)
} }
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
if (worker->event != NULL) { if (worker->event) {
err(udev, "worker [%u] failed while handling '%s'\n", err(udev, "worker [%u] failed while handling '%s'\n",
pid, worker->event->devpath); pid, worker->event->devpath);
worker->event->exitcode = -32; worker->event->exitcode = -32;
@ -1574,25 +1580,57 @@ int main(int argc, char *argv[])
break; break;
/* timeout at exit for workers to finish */ /* timeout at exit for workers to finish */
timeout = 60 * 1000; timeout = 30 * 1000;
} else if (udev_list_node_is_empty(&event_list) && children > 2) { } else if (udev_list_node_is_empty(&event_list) && children <= 2) {
/* set timeout to kill idle workers */ /* we are idle */
timeout = 3 * 1000;
} else {
timeout = -1; timeout = -1;
} else {
/* kill idle or hanging workers */
timeout = 3 * 1000;
} }
fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), timeout); fdcount = epoll_wait(fd_ep, ev, ARRAY_SIZE(ev), timeout);
if (fdcount < 0) if (fdcount < 0)
continue; continue;
if (fdcount == 0) { if (fdcount == 0) {
struct udev_list_node *loop;
/* timeout */
if (udev_exit) { if (udev_exit) {
info(udev, "timeout, giving up waiting for workers to finish\n"); err(udev, "timeout, giving up waiting for workers to finish\n");
break; break;
} }
/* timeout - kill idle workers */ /* kill idle workers */
worker_kill(udev, 2); if (udev_list_node_is_empty(&event_list)) {
info(udev, "cleanup idle workers\n");
worker_kill(udev, 2);
}
/* check for hanging events */
udev_list_node_foreach(loop, &worker_list) {
struct worker *worker = node_to_worker(loop);
if (worker->state != WORKER_RUNNING)
continue;
if ((now_usec() - worker->event_start_usec) > 30 * 1000 * 1000) {
err(udev, "worker [%u] timeout, kill it\n", worker->pid,
worker->event ? worker->event->devpath : "<idle>");
kill(worker->pid, SIGKILL);
worker->state = WORKER_KILLED;
/* drop reference taken for state 'running' */
worker_unref(worker);
if (worker->event) {
err(udev, "seq %llu '%s' killed\n",
udev_device_get_seqnum(worker->event->dev), worker->event->devpath);
worker->event->exitcode = -64;
event_queue_delete(worker->event, true);
worker->event = NULL;
}
}
}
} }
is_worker = is_signal = is_inotify = is_netlink = is_ctrl = false; is_worker = is_signal = is_inotify = is_netlink = is_ctrl = false;