From cbfb8679dd5812a43252b9665f472a6cfeb3768f Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 20 Nov 2017 15:29:53 +0100
Subject: [PATCH 1/4] mount-util: add name_to_handle_at_loop() wrapper around
 name_to_handle_at()

As it turns out MAX_HANDLE_SZ is a lie, the handle buffer we pass into
name_to_handle_at() might need to be larger than MAX_HANDLE_SZ, and we
thus need to invoke name_to_handle_at() in a loop, growing the buffer as
needed.

This adds a new wrapper name_to_handle_at_loop() around
name_to_handle_at() that does the necessary looping, and ports over all
users.

Fixes: #7082
---
 src/basic/mount-util.c        | 133 +++++++++++++++++++++++++---------
 src/basic/mount-util.h        |   9 +--
 src/libudev/libudev-monitor.c |   3 +-
 src/tmpfiles/tmpfiles.c       |   6 +-
 4 files changed, 102 insertions(+), 49 deletions(-)

diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index 8801e2f8f1f..9fb96e6b6d7 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -40,6 +40,76 @@
 #include "string-util.h"
 #include "strv.h"
 
+int name_to_handle_at_loop(
+                int fd,
+                const char *path,
+                struct file_handle **ret_handle,
+                int *ret_mnt_id,
+                int flags) {
+
+        _cleanup_free_ struct file_handle *h;
+        size_t n = MAX_HANDLE_SZ;
+
+        /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
+         * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
+         * start value, it is not an upper bound on the buffer size required.
+         *
+         * This improves on raw name_to_handle_at() also in one other regard: ret_handle and ret_mnt_id can be passed
+         * as NULL if there's no interest in either. */
+
+        h = malloc0(offsetof(struct file_handle, f_handle) + n);
+        if (!h)
+                return -ENOMEM;
+
+        h->handle_bytes = n;
+
+        for (;;) {
+                int mnt_id = -1;
+                size_t m;
+
+                if (name_to_handle_at(fd, path, h, &mnt_id, flags) >= 0) {
+
+                        if (ret_handle) {
+                                *ret_handle = h;
+                                h = NULL;
+                        }
+
+                        if (ret_mnt_id)
+                                *ret_mnt_id = mnt_id;
+
+                        return 0;
+                }
+                if (errno != EOVERFLOW)
+                        return -errno;
+
+                if (!ret_handle && ret_mnt_id && mnt_id >= 0) {
+
+                        /* As it appears, name_to_handle_at() fills in mnt_id even when it returns EOVERFLOW, but
+                         * that's undocumented. Hence, let's make use of this if it appears to be filled in, and the
+                         * caller was interested in only the mount ID an nothing else. */
+
+                        *ret_mnt_id = mnt_id;
+                        return 0;
+                }
+
+                /* The buffer was too small. Size the new buffer by what name_to_handle_at() returned, but make sure it
+                 * is always larger than what we passed in before */
+                m = h->handle_bytes > n ? h->handle_bytes : n * 2;
+                if (m < n) /* Check for multiplication overflow */
+                        return -EOVERFLOW;
+                if (offsetof(struct file_handle, f_handle) + m < m) /* check for addition overflow */
+                        return -EOVERFLOW;
+                n = m;
+
+                free(h);
+                h = malloc0(offsetof(struct file_handle, f_handle) + n);
+                if (!h)
+                        return -ENOMEM;
+
+                h->handle_bytes = n;
+        }
+}
+
 static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
         char path[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
         _cleanup_free_ char *fdinfo = NULL;
@@ -79,7 +149,7 @@ static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id
 }
 
 int fd_is_mount_point(int fd, const char *filename, int flags) {
-        union file_handle_union h = FILE_HANDLE_INIT, h_parent = FILE_HANDLE_INIT;
+        _cleanup_free_ struct file_handle *h = NULL, *h_parent = NULL;
         int mount_id = -1, mount_id_parent = -1;
         bool nosupp = false, check_st_dev = true;
         struct stat a, b;
@@ -111,39 +181,30 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
          * subvolumes have different st_dev, even though they aren't
          * real mounts of their own. */
 
-        r = name_to_handle_at(fd, filename, &h.handle, &mount_id, flags);
-        if (r < 0) {
-                if (IN_SET(errno, ENOSYS, EACCES, EPERM))
-                        /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe
-                         * through seccomp, because we are running inside of a container?): fall back to simpler
-                         * logic. */
-                        goto fallback_fdinfo;
-                else if (errno == EOPNOTSUPP)
-                        /* This kernel or file system does not support
-                         * name_to_handle_at(), hence let's see if the
-                         * upper fs supports it (in which case it is a
-                         * mount point), otherwise fallback to the
-                         * traditional stat() logic */
-                        nosupp = true;
-                else
-                        return -errno;
-        }
+        r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
+        if (IN_SET(r, -ENOSYS, -EACCES, -EPERM))
+                /* This kernel does not support name_to_handle_at() at all, or the syscall was blocked (maybe through
+                 * seccomp, because we are running inside of a container?): fall back to simpler logic. */
+                goto fallback_fdinfo;
+        else if (r == -EOPNOTSUPP)
+                /* This kernel or file system does not support name_to_handle_at(), hence let's see if the upper fs
+                 * supports it (in which case it is a mount point), otherwise fallback to the traditional stat()
+                 * logic */
+                nosupp = true;
+        else if (r < 0)
+                return r;
 
-        r = name_to_handle_at(fd, "", &h_parent.handle, &mount_id_parent, AT_EMPTY_PATH);
-        if (r < 0) {
-                if (errno == EOPNOTSUPP) {
-                        if (nosupp)
-                                /* Neither parent nor child do name_to_handle_at()?
-                                   We have no choice but to fall back. */
-                                goto fallback_fdinfo;
-                        else
-                                /* The parent can't do name_to_handle_at() but the
-                                 * directory we are interested in can?
-                                 * If so, it must be a mount point. */
-                                return 1;
-                } else
-                        return -errno;
-        }
+        r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
+        if (r == -EOPNOTSUPP) {
+                if (nosupp)
+                        /* Neither parent nor child do name_to_handle_at()?  We have no choice but to fall back. */
+                        goto fallback_fdinfo;
+                else
+                        /* The parent can't do name_to_handle_at() but the directory we are interested in can?  If so,
+                         * it must be a mount point. */
+                        return 1;
+        } else if (r < 0)
+                       return r;
 
         /* The parent can do name_to_handle_at() but the
          * directory we are interested in can't? If so, it
@@ -156,9 +217,9 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
          * assume this is the root directory, which is a mount
          * point. */
 
-        if (h.handle.handle_bytes == h_parent.handle.handle_bytes &&
-            h.handle.handle_type == h_parent.handle.handle_type &&
-            memcmp(h.handle.f_handle, h_parent.handle.f_handle, h.handle.handle_bytes) == 0)
+        if (h->handle_bytes == h_parent->handle_bytes &&
+            h->handle_type == h_parent->handle_type &&
+            memcmp(h->f_handle, h_parent->f_handle, h->handle_bytes) == 0)
                 return 1;
 
         return mount_id != mount_id_parent;
diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h
index 0f53fee2f23..453bf9a0a55 100644
--- a/src/basic/mount-util.h
+++ b/src/basic/mount-util.h
@@ -30,6 +30,8 @@
 #include "macro.h"
 #include "missing.h"
 
+int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
+
 int fd_is_mount_point(int fd, const char *filename, int flags);
 int path_is_mount_point(const char *path, const char *root, int flags);
 
@@ -49,15 +51,8 @@ bool fstype_is_api_vfs(const char *fstype);
 bool fstype_is_ro(const char *fsype);
 bool fstype_can_discard(const char *fstype);
 
-union file_handle_union {
-        struct file_handle handle;
-        char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
-};
-
 const char* mode_to_inaccessible_node(mode_t mode);
 
-#define FILE_HANDLE_INIT { .handle.handle_bytes = MAX_HANDLE_SZ }
-
 int mount_verbose(
                 int error_log_level,
                 const char *what,
diff --git a/src/libudev/libudev-monitor.c b/src/libudev/libudev-monitor.c
index e43d7121955..68fd174a3a3 100644
--- a/src/libudev/libudev-monitor.c
+++ b/src/libudev/libudev-monitor.c
@@ -115,13 +115,12 @@ static struct udev_monitor *udev_monitor_new(struct udev *udev)
 /* we consider udev running when /dev is on devtmpfs */
 static bool udev_has_devtmpfs(struct udev *udev) {
 
-        union file_handle_union h = FILE_HANDLE_INIT;
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX], *e;
         int mount_id;
         int r;
 
-        r = name_to_handle_at(AT_FDCWD, "/dev", &h.handle, &mount_id, 0);
+        r = name_to_handle_at_loop(AT_FDCWD, "/dev", NULL, &mount_id, 0);
         if (r < 0) {
                 if (errno != EOPNOTSUPP)
                         log_debug_errno(errno, "name_to_handle_at on /dev: %m");
diff --git a/src/tmpfiles/tmpfiles.c b/src/tmpfiles/tmpfiles.c
index 173774ea0a0..b8d67960b6d 100644
--- a/src/tmpfiles/tmpfiles.c
+++ b/src/tmpfiles/tmpfiles.c
@@ -346,16 +346,14 @@ static bool unix_socket_alive(const char *fn) {
 
 static int dir_is_mount_point(DIR *d, const char *subdir) {
 
-        union file_handle_union h = FILE_HANDLE_INIT;
         int mount_id_parent, mount_id;
         int r_p, r;
 
-        r_p = name_to_handle_at(dirfd(d), ".", &h.handle, &mount_id_parent, 0);
+        r_p = name_to_handle_at_loop(dirfd(d), ".", NULL, &mount_id_parent, 0);
         if (r_p < 0)
                 r_p = -errno;
 
-        h.handle.handle_bytes = MAX_HANDLE_SZ;
-        r = name_to_handle_at(dirfd(d), subdir, &h.handle, &mount_id, 0);
+        r = name_to_handle_at_loop(dirfd(d), subdir, NULL, &mount_id, 0);
         if (r < 0)
                 r = -errno;
 

From c2a986d5091af4e9c6f43695ecbfd7519f86e465 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 20 Nov 2017 16:05:41 +0100
Subject: [PATCH 2/4] mount-util: add new path_get_mnt_id() call that queries
 the mnt ID of a path

This is a simple wrapper around name_to_handle_at_loop() and
fd_fdinfo_mnt_id() to query the mnt ID of a path. It uses
name_to_handle_at() where it can, and falls back to to
fd_fdinfo_mnt_id() where that doesn't work.

This is a best-effort thing of course, since neither name_to_handle_at()
nor the fdinfo logic work on all kernels.
---
 src/basic/mount-util.c     | 10 ++++++
 src/basic/mount-util.h     |  2 ++
 src/test/test-mount-util.c | 66 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 78 insertions(+)

diff --git a/src/basic/mount-util.c b/src/basic/mount-util.c
index 9fb96e6b6d7..5f22bd68cec 100644
--- a/src/basic/mount-util.c
+++ b/src/basic/mount-util.c
@@ -302,6 +302,16 @@ int path_is_mount_point(const char *t, const char *root, int flags) {
         return fd_is_mount_point(fd, basename(t), flags);
 }
 
+int path_get_mnt_id(const char *path, int *ret) {
+        int r;
+
+        r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
+        if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM)) /* kernel/fs don't support this, or seccomp blocks access */
+                return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
+
+        return r;
+}
+
 int umount_recursive(const char *prefix, int flags) {
         bool again;
         int n = 0, r;
diff --git a/src/basic/mount-util.h b/src/basic/mount-util.h
index 453bf9a0a55..a19b0d9c421 100644
--- a/src/basic/mount-util.h
+++ b/src/basic/mount-util.h
@@ -32,6 +32,8 @@
 
 int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
 
+int path_get_mnt_id(const char *path, int *ret);
+
 int fd_is_mount_point(int fd, const char *filename, int flags);
 int path_is_mount_point(const char *path, const char *root, int flags);
 
diff --git a/src/test/test-mount-util.c b/src/test/test-mount-util.c
index e2a0b4dfc15..5a93da1c0d7 100644
--- a/src/test/test-mount-util.c
+++ b/src/test/test-mount-util.c
@@ -20,6 +20,11 @@
 
 #include <sys/mount.h>
 
+#include "alloc-util.h"
+#include "def.h"
+#include "fd-util.h"
+#include "fileio.h"
+#include "hashmap.h"
 #include "log.h"
 #include "mount-util.h"
 #include "string-util.h"
@@ -42,6 +47,65 @@ static void test_mount_propagation_flags(const char *name, int ret, unsigned lon
         }
 }
 
+static void test_mnt_id(void) {
+        _cleanup_fclose_ FILE *f = NULL;
+        Hashmap *h;
+        Iterator i;
+        char *k;
+        void *p;
+        int r;
+
+        assert_se(f = fopen("/proc/self/mountinfo", "re"));
+        assert_se(h = hashmap_new(&string_hash_ops));
+
+        for (;;) {
+                _cleanup_free_ char *line = NULL, *path = NULL;
+                void *old_key;
+                int mnt_id;
+
+                r = read_line(f, LONG_LINE_MAX, &line);
+                if (r == 0)
+                        break;
+                assert_se(r > 0);
+
+                assert_se(sscanf(line, "%i %*s %*s %*s %ms", &mnt_id, &path) == 2);
+
+                /* Add all mount points and their ids to a hashtable, so that we filter out mount points that are
+                 * overmounted. For those we only care for the "upper" mount, since that's the only one
+                 * path_get_mnt_id() can determine. */
+
+                if (hashmap_remove2(h, path, &old_key))
+                        free(old_key);
+
+                assert_se(hashmap_put(h, path, INT_TO_PTR(mnt_id)) >= 0);
+                path = NULL;
+        }
+
+        HASHMAP_FOREACH_KEY(p, k, h, i) {
+                int mnt_id = PTR_TO_INT(p), mnt_id2;
+
+                r = path_get_mnt_id(k, &mnt_id2);
+                if (r == -EOPNOTSUPP) { /* kernel or file system too old? */
+                        log_debug("%s doesn't support mount IDs\n", k);
+                        continue;
+                }
+                if (IN_SET(r, -EACCES, -EPERM)) {
+                        log_debug("Can't access %s\n", k);
+                        continue;
+                }
+
+                log_debug("mnt id of %s is %i\n", k, mnt_id2);
+
+                assert_se(r >= 0);
+                assert_se(mnt_id == mnt_id2);
+        }
+
+        while ((k = hashmap_steal_first_key(h)))
+                free(k);
+
+        hashmap_free(h);
+}
+
 int main(int argc, char *argv[]) {
 
         log_set_max_level(LOG_DEBUG);
@@ -54,5 +118,7 @@ int main(int argc, char *argv[]) {
         test_mount_propagation_flags("xxxx", -EINVAL, 0);
         test_mount_propagation_flags(" ", -EINVAL, 0);
 
+        test_mnt_id();
+
         return 0;
 }

From 654c87e0e6f30b4ac63940c3d5d88f5dd820e39d Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 20 Nov 2017 16:08:06 +0100
Subject: [PATCH 3/4] udev: port udev_has_devtmpfs() to use path_get_mnt_id()

This means there's a good chance the code also works on kernels that
lack name_to_handle_at().
---
 src/libudev/libudev-monitor.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/libudev/libudev-monitor.c b/src/libudev/libudev-monitor.c
index 68fd174a3a3..3c9f1f94967 100644
--- a/src/libudev/libudev-monitor.c
+++ b/src/libudev/libudev-monitor.c
@@ -96,8 +96,7 @@ struct udev_monitor_netlink_header {
         unsigned int filter_tag_bloom_lo;
 };
 
-static struct udev_monitor *udev_monitor_new(struct udev *udev)
-{
+static struct udev_monitor *udev_monitor_new(struct udev *udev) {
         struct udev_monitor *udev_monitor;
 
         udev_monitor = new0(struct udev_monitor, 1);
@@ -117,13 +116,13 @@ static bool udev_has_devtmpfs(struct udev *udev) {
 
         _cleanup_fclose_ FILE *f = NULL;
         char line[LINE_MAX], *e;
-        int mount_id;
-        int r;
+        int mount_id, r;
 
-        r = name_to_handle_at_loop(AT_FDCWD, "/dev", NULL, &mount_id, 0);
+        r = path_get_mnt_id("/dev", &mount_id);
         if (r < 0) {
-                if (errno != EOPNOTSUPP)
-                        log_debug_errno(errno, "name_to_handle_at on /dev: %m");
+                if (r != -EOPNOTSUPP)
+                        log_debug_errno(r, "name_to_handle_at on /dev: %m");
+
                 return false;
         }
 
@@ -168,8 +167,7 @@ static void monitor_set_nl_address(struct udev_monitor *udev_monitor) {
                 udev_monitor->snl.nl.nl_pid = snl.nl.nl_pid;
 }
 
-struct udev_monitor *udev_monitor_new_from_netlink_fd(struct udev *udev, const char *name, int fd)
-{
+struct udev_monitor *udev_monitor_new_from_netlink_fd(struct udev *udev, const char *name, int fd) {
         struct udev_monitor *udev_monitor;
         unsigned int group;
 
@@ -253,8 +251,7 @@ struct udev_monitor *udev_monitor_new_from_netlink_fd(struct udev *udev, const c
  *
  * Returns: a new udev monitor, or #NULL, in case of an error
  **/
-_public_ struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name)
-{
+_public_ struct udev_monitor *udev_monitor_new_from_netlink(struct udev *udev, const char *name) {
         return udev_monitor_new_from_netlink_fd(udev, name, -1);
 }
 

From 190654f44b9d141d458a866fe205d9727c36861a Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 21 Nov 2017 11:08:22 +0100
Subject: [PATCH 4/4] test: fix UDEV-WANTS testcase for non-bash shells

testsuite.sh uses "set -o pipefile", which is a bashism, hence use bash
to invoke the script.
---
 test/TEST-17-UDEV-WANTS/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/TEST-17-UDEV-WANTS/test.sh b/test/TEST-17-UDEV-WANTS/test.sh
index 7beef7f3984..24989ebcf6d 100755
--- a/test/TEST-17-UDEV-WANTS/test.sh
+++ b/test/TEST-17-UDEV-WANTS/test.sh
@@ -32,7 +32,7 @@ test_setup() {
 Description=Testsuite service
 
 [Service]
-ExecStart=/bin/sh -x /testsuite.sh
+ExecStart=/bin/bash -x /testsuite.sh
 Type=oneshot
 StandardOutput=tty
 StandardError=tty