epoll: use RCU to protect wakeup_source in epitem
This prevents wakeup_source destruction when a user hits the item with EPOLL_CTL_MOD while ep_poll_callback is running. Tested with CONFIG_SPARSE_RCU_POINTER=y and "make fs/eventpoll.o C=2" Signed-off-by: Eric Wong <normalperson@yhbt.net> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Arve Hjønnevåg <arve@android.com> Cc: Davide Libenzi <davidel@xmailserver.org> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: NeilBrown <neilb@suse.de> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
39732ca5af
commit
eea1d58591
@ -160,7 +160,7 @@ struct epitem {
|
||||
struct list_head fllink;
|
||||
|
||||
/* wakeup_source used when EPOLLWAKEUP is set */
|
||||
struct wakeup_source *ws;
|
||||
struct wakeup_source __rcu *ws;
|
||||
|
||||
/* The structure that describe the interested events and the source fd */
|
||||
struct epoll_event event;
|
||||
@ -538,6 +538,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
|
||||
}
|
||||
}
|
||||
|
||||
/* call only when ep->mtx is held */
|
||||
static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
|
||||
{
|
||||
return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
|
||||
}
|
||||
|
||||
/* call only when ep->mtx is held */
|
||||
static inline void ep_pm_stay_awake(struct epitem *epi)
|
||||
{
|
||||
struct wakeup_source *ws = ep_wakeup_source(epi);
|
||||
|
||||
if (ws)
|
||||
__pm_stay_awake(ws);
|
||||
}
|
||||
|
||||
static inline bool ep_has_wakeup_source(struct epitem *epi)
|
||||
{
|
||||
return rcu_access_pointer(epi->ws) ? true : false;
|
||||
}
|
||||
|
||||
/* call when ep->mtx cannot be held (ep_poll_callback) */
|
||||
static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
|
||||
{
|
||||
struct wakeup_source *ws;
|
||||
|
||||
rcu_read_lock();
|
||||
ws = rcu_dereference(epi->ws);
|
||||
if (ws)
|
||||
__pm_stay_awake(ws);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* ep_scan_ready_list - Scans the ready list in a way that makes possible for
|
||||
* the scan code, to call f_op->poll(). Also allows for
|
||||
@ -601,7 +633,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
|
||||
*/
|
||||
if (!ep_is_linked(&epi->rdllink)) {
|
||||
list_add_tail(&epi->rdllink, &ep->rdllist);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake(epi);
|
||||
}
|
||||
}
|
||||
/*
|
||||
@ -670,7 +702,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
|
||||
list_del_init(&epi->rdllink);
|
||||
spin_unlock_irqrestore(&ep->lock, flags);
|
||||
|
||||
wakeup_source_unregister(epi->ws);
|
||||
wakeup_source_unregister(ep_wakeup_source(epi));
|
||||
|
||||
/* At this point it is safe to free the eventpoll item */
|
||||
kmem_cache_free(epi_cache, epi);
|
||||
@ -754,7 +786,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
|
||||
* callback, but it's not actually ready, as far as
|
||||
* caller requested events goes. We can remove it here.
|
||||
*/
|
||||
__pm_relax(epi->ws);
|
||||
__pm_relax(ep_wakeup_source(epi));
|
||||
list_del_init(&epi->rdllink);
|
||||
}
|
||||
}
|
||||
@ -986,7 +1018,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
|
||||
/* If this file is already in the ready list we exit soon */
|
||||
if (!ep_is_linked(&epi->rdllink)) {
|
||||
list_add_tail(&epi->rdllink, &ep->rdllist);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake_rcu(epi);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1148,6 +1180,7 @@ static int reverse_path_check(void)
|
||||
static int ep_create_wakeup_source(struct epitem *epi)
|
||||
{
|
||||
const char *name;
|
||||
struct wakeup_source *ws;
|
||||
|
||||
if (!epi->ep->ws) {
|
||||
epi->ep->ws = wakeup_source_register("eventpoll");
|
||||
@ -1156,17 +1189,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
|
||||
}
|
||||
|
||||
name = epi->ffd.file->f_path.dentry->d_name.name;
|
||||
epi->ws = wakeup_source_register(name);
|
||||
if (!epi->ws)
|
||||
ws = wakeup_source_register(name);
|
||||
|
||||
if (!ws)
|
||||
return -ENOMEM;
|
||||
rcu_assign_pointer(epi->ws, ws);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ep_destroy_wakeup_source(struct epitem *epi)
|
||||
/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
|
||||
static noinline void ep_destroy_wakeup_source(struct epitem *epi)
|
||||
{
|
||||
wakeup_source_unregister(epi->ws);
|
||||
epi->ws = NULL;
|
||||
struct wakeup_source *ws = ep_wakeup_source(epi);
|
||||
|
||||
rcu_assign_pointer(epi->ws, NULL);
|
||||
|
||||
/*
|
||||
* wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
|
||||
* used internally by wakeup_source_remove, too (called by
|
||||
* wakeup_source_unregister), so we cannot use call_rcu
|
||||
*/
|
||||
synchronize_rcu();
|
||||
wakeup_source_unregister(ws);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1201,7 +1246,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
||||
if (error)
|
||||
goto error_create_wakeup_source;
|
||||
} else {
|
||||
epi->ws = NULL;
|
||||
RCU_INIT_POINTER(epi->ws, NULL);
|
||||
}
|
||||
|
||||
/* Initialize the poll table using the queue callback */
|
||||
@ -1249,7 +1294,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
|
||||
/* If the file is already "ready" we drop it inside the ready list */
|
||||
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
|
||||
list_add_tail(&epi->rdllink, &ep->rdllist);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake(epi);
|
||||
|
||||
/* Notify waiting tasks that events are available */
|
||||
if (waitqueue_active(&ep->wq))
|
||||
@ -1290,7 +1335,7 @@ error_unregister:
|
||||
list_del_init(&epi->rdllink);
|
||||
spin_unlock_irqrestore(&ep->lock, flags);
|
||||
|
||||
wakeup_source_unregister(epi->ws);
|
||||
wakeup_source_unregister(ep_wakeup_source(epi));
|
||||
|
||||
error_create_wakeup_source:
|
||||
kmem_cache_free(epi_cache, epi);
|
||||
@ -1319,9 +1364,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
|
||||
pt._key = event->events;
|
||||
epi->event.data = event->data; /* protected by mtx */
|
||||
if (epi->event.events & EPOLLWAKEUP) {
|
||||
if (!epi->ws)
|
||||
if (!ep_has_wakeup_source(epi))
|
||||
ep_create_wakeup_source(epi);
|
||||
} else if (epi->ws) {
|
||||
} else if (ep_has_wakeup_source(epi)) {
|
||||
ep_destroy_wakeup_source(epi);
|
||||
}
|
||||
|
||||
@ -1359,7 +1404,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
|
||||
spin_lock_irq(&ep->lock);
|
||||
if (!ep_is_linked(&epi->rdllink)) {
|
||||
list_add_tail(&epi->rdllink, &ep->rdllist);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake(epi);
|
||||
|
||||
/* Notify waiting tasks that events are available */
|
||||
if (waitqueue_active(&ep->wq))
|
||||
@ -1385,6 +1430,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
||||
unsigned int revents;
|
||||
struct epitem *epi;
|
||||
struct epoll_event __user *uevent;
|
||||
struct wakeup_source *ws;
|
||||
poll_table pt;
|
||||
|
||||
init_poll_funcptr(&pt, NULL);
|
||||
@ -1407,9 +1453,13 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
||||
* instead, but then epi->ws would temporarily be out of sync
|
||||
* with ep_is_linked().
|
||||
*/
|
||||
if (epi->ws && epi->ws->active)
|
||||
__pm_stay_awake(ep->ws);
|
||||
__pm_relax(epi->ws);
|
||||
ws = ep_wakeup_source(epi);
|
||||
if (ws) {
|
||||
if (ws->active)
|
||||
__pm_stay_awake(ep->ws);
|
||||
__pm_relax(ws);
|
||||
}
|
||||
|
||||
list_del_init(&epi->rdllink);
|
||||
|
||||
pt._key = epi->event.events;
|
||||
@ -1426,7 +1476,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
||||
if (__put_user(revents, &uevent->events) ||
|
||||
__put_user(epi->event.data, &uevent->data)) {
|
||||
list_add(&epi->rdllink, head);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake(epi);
|
||||
return eventcnt ? eventcnt : -EFAULT;
|
||||
}
|
||||
eventcnt++;
|
||||
@ -1446,7 +1496,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
|
||||
* poll callback will queue them in ep->ovflist.
|
||||
*/
|
||||
list_add_tail(&epi->rdllink, &ep->rdllist);
|
||||
__pm_stay_awake(epi->ws);
|
||||
ep_pm_stay_awake(epi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user