From fed1755b118147721f2c87b37b9d66e62c39b668 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Dec 2020 10:02:45 +0100 Subject: [PATCH 1/6] xen/xenbus: Allow watches discard events before queueing If handling logics of watch events are slower than the events enqueue logic and the events can be created from the guests, the guests could trigger memory pressure by intensively inducing the events, because it will create a huge number of pending events that exhausting the memory. Fortunately, some watch events could be ignored, depending on its handler callback. For example, if the callback has interest in only one single path, the watch wouldn't want multiple pending events. Or, some watches could ignore events to same path. To let such watches to volutarily help avoiding the memory pressure situation, this commit introduces new watch callback, 'will_handle'. If it is not NULL, it will be called for each new event just before enqueuing it. Then, if the callback returns false, the event will be discarded. No watch is using the callback for now, though. This is part of XSA-349 Cc: stable@vger.kernel.org Signed-off-by: SeongJae Park Reported-by: Michael Kurth Reported-by: Pawel Wieczorkiewicz Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/net/xen-netback/xenbus.c | 4 ++++ drivers/xen/xenbus/xenbus_client.c | 1 + drivers/xen/xenbus/xenbus_xs.c | 5 ++++- include/xen/xenbus.h | 7 +++++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index f1c1624cec8f..00f6f8dc56c8 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -557,12 +557,14 @@ static int xen_register_credit_watch(struct xenbus_device *dev, return -ENOMEM; snprintf(node, maxlen, "%s/rate", dev->nodename); vif->credit_watch.node = node; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = xen_net_rate_changed; err = register_xenbus_watch(&vif->credit_watch); if (err) { pr_err("Failed to set watcher %s\n", vif->credit_watch.node); kfree(node); vif->credit_watch.node = NULL; + vif->credit_watch.will_handle = NULL; vif->credit_watch.callback = NULL; } return err; @@ -609,6 +611,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, snprintf(node, maxlen, "%s/request-multicast-control", dev->otherend); vif->mcast_ctrl_watch.node = node; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = xen_mcast_ctrl_changed; err = register_xenbus_watch(&vif->mcast_ctrl_watch); if (err) { @@ -616,6 +619,7 @@ static int xen_register_mcast_ctrl_watch(struct xenbus_device *dev, vif->mcast_ctrl_watch.node); kfree(node); vif->mcast_ctrl_watch.node = NULL; + vif->mcast_ctrl_watch.will_handle = NULL; vif->mcast_ctrl_watch.callback = NULL; } return err; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index fd80e318b99c..0a21a12d9c34 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -133,6 +133,7 @@ int xenbus_watch_path(struct xenbus_device *dev, const char *path, int err; watch->node = path; + watch->will_handle = NULL; watch->callback = callback; err = register_xenbus_watch(watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 3a06eb699f33..e8bdbd0a1e26 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -705,7 +705,10 @@ int xs_watch_msg(struct xs_watch_event *event) spin_lock(&watches_lock); event->handle = find_watch(event->token); - if (event->handle != NULL) { + if (event->handle != NULL && + (!event->handle->will_handle || + event->handle->will_handle(event->handle, + event->path, event->token))) { spin_lock(&watch_events_lock); list_add_tail(&event->list, &watch_events); wake_up(&watch_events_waitq); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 5a8315e6d8a6..baa88bf0b9bc 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -61,6 +61,13 @@ struct xenbus_watch /* Path being watched. */ const char *node; + /* + * Called just before enqueing new event while a spinlock is held. + * The event will be discarded if this callback returns false. + */ + bool (*will_handle)(struct xenbus_watch *, + const char *path, const char *token); + /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char *path, const char *token); From 2e85d32b1c865bec703ce0c962221a5e955c52c2 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Dec 2020 10:04:18 +0100 Subject: [PATCH 2/6] xen/xenbus: Add 'will_handle' callback support in xenbus_watch_path() Some code does not directly make 'xenbus_watch' object and call 'register_xenbus_watch()' but use 'xenbus_watch_path()' instead. This commit adds support of 'will_handle' callback in the 'xenbus_watch_path()' and it's wrapper, 'xenbus_watch_pathfmt()'. This is part of XSA-349 Cc: stable@vger.kernel.org Signed-off-by: SeongJae Park Reported-by: Michael Kurth Reported-by: Pawel Wieczorkiewicz Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/block/xen-blkback/xenbus.c | 3 ++- drivers/net/xen-netback/xenbus.c | 2 +- drivers/xen/xen-pciback/xenbus.c | 2 +- drivers/xen/xenbus/xenbus_client.c | 9 +++++++-- drivers/xen/xenbus/xenbus_probe.c | 2 +- include/xen/xenbus.h | 6 +++++- 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 76912c584a76..1d8b8d24496c 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -675,7 +675,8 @@ static int xen_blkbk_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + err = xenbus_watch_pathfmt(dev, &be->backend_watch, NULL, + backend_changed, "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 00f6f8dc56c8..6f10e0998f1c 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -824,7 +824,7 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, hotplug_status_changed, "%s/%s", dev->nodename, "hotplug-status"); if (!err) diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 4b99ec3dec58..e7c692cfb2cf 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -689,7 +689,7 @@ static int xen_pcibk_xenbus_probe(struct xenbus_device *dev, /* watch the backend node for backend configuration information */ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch, - xen_pcibk_be_watch); + NULL, xen_pcibk_be_watch); if (err) goto out; diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0a21a12d9c34..0cd728961fce 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -127,19 +127,22 @@ EXPORT_SYMBOL_GPL(xenbus_strstate); */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)) { int err; watch->node = path; - watch->will_handle = NULL; + watch->will_handle = will_handle; watch->callback = callback; err = register_xenbus_watch(watch); if (err) { watch->node = NULL; + watch->will_handle = NULL; watch->callback = NULL; xenbus_dev_fatal(dev, err, "adding watch on %s", path); } @@ -166,6 +169,8 @@ EXPORT_SYMBOL_GPL(xenbus_watch_path); */ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...) @@ -182,7 +187,7 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, path, watch, will_handle, callback); if (err) kfree(path); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 38725d97d909..4c3d1b84aa0a 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -136,7 +136,7 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, - bus->otherend_changed, + NULL, bus->otherend_changed, "%s/%s", dev->otherend, "state"); } diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index baa88bf0b9bc..c8574d1b814c 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -204,10 +204,14 @@ void xenbus_probe(struct work_struct *); int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *)); -__printf(4, 5) +__printf(5, 6) int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, + bool (*will_handle)(struct xenbus_watch *, + const char *, const char *), void (*callback)(struct xenbus_watch *, const char *, const char *), const char *pathfmt, ...); From be987200fbaceaef340872841d4f7af2c5ee8dc3 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Dec 2020 10:05:47 +0100 Subject: [PATCH 3/6] xen/xenbus/xen_bus_type: Support will_handle watch callback This commit adds support of the 'will_handle' watch callback for 'xen_bus_type' users. This is part of XSA-349 Cc: stable@vger.kernel.org Signed-off-by: SeongJae Park Reported-by: Michael Kurth Reported-by: Pawel Wieczorkiewicz Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus.h | 2 ++ drivers/xen/xenbus/xenbus_probe.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h index 5f5b8a7d5b80..2a93b7c9c159 100644 --- a/drivers/xen/xenbus/xenbus.h +++ b/drivers/xen/xenbus/xenbus.h @@ -44,6 +44,8 @@ struct xen_bus_type { int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(struct xen_bus_type *bus, const char *type, const char *dir); + bool (*otherend_will_handle)(struct xenbus_watch *watch, + const char *path, const char *token); void (*otherend_changed)(struct xenbus_watch *watch, const char *path, const char *token); struct bus_type bus; diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 4c3d1b84aa0a..44634d970a5c 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -136,7 +136,8 @@ static int watch_otherend(struct xenbus_device *dev) container_of(dev->dev.bus, struct xen_bus_type, bus); return xenbus_watch_pathfmt(dev, &dev->otherend_watch, - NULL, bus->otherend_changed, + bus->otherend_will_handle, + bus->otherend_changed, "%s/%s", dev->otherend, "state"); } From 3dc86ca6b4c8cfcba9da7996189d1b5a358a94fc Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Dec 2020 10:07:13 +0100 Subject: [PATCH 4/6] xen/xenbus: Count pending messages for each watch This commit adds a counter of pending messages for each watch in the struct. It is used to skip unnecessary pending messages lookup in 'unregister_xenbus_watch()'. It could also be used in 'will_handle' callback. This is part of XSA-349 Cc: stable@vger.kernel.org Signed-off-by: SeongJae Park Reported-by: Michael Kurth Reported-by: Pawel Wieczorkiewicz Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_xs.c | 29 ++++++++++++++++++----------- include/xen/xenbus.h | 2 ++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index e8bdbd0a1e26..12e02eb01f59 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -711,6 +711,7 @@ int xs_watch_msg(struct xs_watch_event *event) event->path, event->token))) { spin_lock(&watch_events_lock); list_add_tail(&event->list, &watch_events); + event->handle->nr_pending++; wake_up(&watch_events_waitq); spin_unlock(&watch_events_lock); } else @@ -768,6 +769,8 @@ int register_xenbus_watch(struct xenbus_watch *watch) sprintf(token, "%lX", (long)watch); + watch->nr_pending = 0; + down_read(&xs_watch_rwsem); spin_lock(&watches_lock); @@ -817,11 +820,14 @@ void unregister_xenbus_watch(struct xenbus_watch *watch) /* Cancel pending watch events. */ spin_lock(&watch_events_lock); - list_for_each_entry_safe(event, tmp, &watch_events, list) { - if (event->handle != watch) - continue; - list_del(&event->list); - kfree(event); + if (watch->nr_pending) { + list_for_each_entry_safe(event, tmp, &watch_events, list) { + if (event->handle != watch) + continue; + list_del(&event->list); + kfree(event); + } + watch->nr_pending = 0; } spin_unlock(&watch_events_lock); @@ -868,7 +874,6 @@ void xs_suspend_cancel(void) static int xenwatch_thread(void *unused) { - struct list_head *ent; struct xs_watch_event *event; xenwatch_pid = current->pid; @@ -883,13 +888,15 @@ static int xenwatch_thread(void *unused) mutex_lock(&xenwatch_mutex); spin_lock(&watch_events_lock); - ent = watch_events.next; - if (ent != &watch_events) - list_del(ent); + event = list_first_entry_or_null(&watch_events, + struct xs_watch_event, list); + if (event) { + list_del(&event->list); + event->handle->nr_pending--; + } spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - event = list_entry(ent, struct xs_watch_event, list); + if (event) { event->handle->callback(event->handle, event->path, event->token); kfree(event); diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index c8574d1b814c..00c7235ae93e 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -61,6 +61,8 @@ struct xenbus_watch /* Path being watched. */ const char *node; + unsigned int nr_pending; + /* * Called just before enqueing new event while a spinlock is held. * The event will be discarded if this callback returns false. From 9996bd494794a2fe393e97e7a982388c6249aa76 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 14 Dec 2020 10:08:40 +0100 Subject: [PATCH 5/6] xenbus/xenbus_backend: Disallow pending watch messages 'xenbus_backend' watches 'state' of devices, which is writable by guests. Hence, if guests intensively updates it, dom0 will have lots of pending events that exhausting memory of dom0. In other words, guests can trigger dom0 memory pressure. This is known as XSA-349. However, the watch callback of it, 'frontend_changed()', reads only 'state', so doesn't need to have the pending events. To avoid the problem, this commit disallows pending watch messages for 'xenbus_backend' using the 'will_handle()' watch callback. This is part of XSA-349 Cc: stable@vger.kernel.org Signed-off-by: SeongJae Park Reported-by: Michael Kurth Reported-by: Pawel Wieczorkiewicz Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/xenbus/xenbus_probe_backend.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index 2ba699897e6d..5abded97e1a7 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -180,6 +180,12 @@ static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, return err; } +static bool frontend_will_handle(struct xenbus_watch *watch, + const char *path, const char *token) +{ + return watch->nr_pending == 0; +} + static void frontend_changed(struct xenbus_watch *watch, const char *path, const char *token) { @@ -191,6 +197,7 @@ static struct xen_bus_type xenbus_backend = { .levels = 3, /* backend/type// */ .get_bus_id = backend_bus_id, .probe = xenbus_probe_backend, + .otherend_will_handle = frontend_will_handle, .otherend_changed = frontend_changed, .bus = { .name = "xen-backend", From 1c728719a4da6e654afb9cc047164755072ed7c9 Mon Sep 17 00:00:00 2001 From: Pawel Wieczorkiewicz Date: Mon, 14 Dec 2020 10:25:57 +0100 Subject: [PATCH 6/6] xen-blkback: set ring->xenblkd to NULL after kthread_stop() When xen_blkif_disconnect() is called, the kernel thread behind the block interface is stopped by calling kthread_stop(ring->xenblkd). The ring->xenblkd thread pointer being non-NULL determines if the thread has been already stopped. Normally, the thread's function xen_blkif_schedule() sets the ring->xenblkd to NULL, when the thread's main loop ends. However, when the thread has not been started yet (i.e. wake_up_process() has not been called on it), the xen_blkif_schedule() function would not be called yet. In such case the kthread_stop() call returns -EINTR and the ring->xenblkd remains dangling. When this happens, any consecutive call to xen_blkif_disconnect (for example in frontend_changed() callback) leads to a kernel crash in kthread_stop() (e.g. NULL pointer dereference in exit_creds()). This is XSA-350. Cc: # 4.12 Fixes: a24fa22ce22a ("xen/blkback: don't use xen_blkif_get() in xen-blkback kthread") Reported-by: Olivier Benjamin Reported-by: Pawel Wieczorkiewicz Signed-off-by: Pawel Wieczorkiewicz Reviewed-by: Julien Grall Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/block/xen-blkback/xenbus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 1d8b8d24496c..9860d4842f36 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -274,6 +274,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) if (ring->xenblkd) { kthread_stop(ring->xenblkd); + ring->xenblkd = NULL; wake_up(&ring->shutdown_wq); }