diff --git a/TODO b/TODO index 9bb0831468..fab110f8b1 100644 --- a/TODO +++ b/TODO @@ -19,9 +19,6 @@ Features: * nss-systemd: also synthesize shadow records for users/groups -* make use of the new statx mountid and rootmount fields in path_get_mnt_id() - and fd_is_mount_point() - * nspawn: move "incoming mount" directory to /run/host, move "inaccessible" nodes to /run/host, move notify socket (for sd_notify() between payload and container manager) diff --git a/src/basic/missing_stat.h b/src/basic/missing_stat.h index 5116206a2e..fc5ba25685 100644 --- a/src/basic/missing_stat.h +++ b/src/basic/missing_stat.h @@ -8,38 +8,47 @@ #include #endif -/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ +/* Thew newest definition we are aware of (fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60; 5.8) */ +#define STATX_DEFINITION { \ + __u32 stx_mask; \ + __u32 stx_blksize; \ + __u64 stx_attributes; \ + __u32 stx_nlink; \ + __u32 stx_uid; \ + __u32 stx_gid; \ + __u16 stx_mode; \ + __u16 __spare0[1]; \ + __u64 stx_ino; \ + __u64 stx_size; \ + __u64 stx_blocks; \ + __u64 stx_attributes_mask; \ + struct statx_timestamp stx_atime; \ + struct statx_timestamp stx_btime; \ + struct statx_timestamp stx_ctime; \ + struct statx_timestamp stx_mtime; \ + __u32 stx_rdev_major; \ + __u32 stx_rdev_minor; \ + __u32 stx_dev_major; \ + __u32 stx_dev_minor; \ + __u64 stx_mnt_id; \ + __u64 __spare2; \ + __u64 __spare3[12]; \ +} + #if !HAVE_STRUCT_STATX struct statx_timestamp { __s64 tv_sec; __u32 tv_nsec; __s32 __reserved; }; -struct statx { - __u32 stx_mask; - __u32 stx_blksize; - __u64 stx_attributes; - __u32 stx_nlink; - __u32 stx_uid; - __u32 stx_gid; - __u16 stx_mode; - __u16 __spare0[1]; - __u64 stx_ino; - __u64 stx_size; - __u64 stx_blocks; - __u64 stx_attributes_mask; - struct statx_timestamp stx_atime; - struct statx_timestamp stx_btime; - struct statx_timestamp stx_ctime; - struct statx_timestamp stx_mtime; - __u32 stx_rdev_major; - __u32 stx_rdev_minor; - __u32 stx_dev_major; - __u32 stx_dev_minor; - __u64 __spare2[14]; -}; + +struct statx STATX_DEFINITION; #endif +/* Always define the newest version we are aware of as a distinct type, so that we can use it even if glibc + * defines an older definition */ +struct new_statx STATX_DEFINITION; + /* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */ #ifndef STATX_BTIME #define STATX_BTIME 0x00000800U @@ -49,3 +58,13 @@ struct statx { #ifndef AT_STATX_DONT_SYNC #define AT_STATX_DONT_SYNC 0x4000 #endif + +/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */ +#ifndef STATX_MNT_ID +#define STATX_MNT_ID 0x00001000U +#endif + +/* 80340fe3605c0e78cfe496c3b3878be828cfdbfe (5.8) */ +#ifndef STATX_ATTR_MOUNT_ROOT +#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ +#endif diff --git a/src/basic/missing_syscall.h b/src/basic/missing_syscall.h index 501c4d811c..b633ea4541 100644 --- a/src/basic/missing_syscall.h +++ b/src/basic/missing_syscall.h @@ -482,7 +482,7 @@ static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flag # endif } -# define statx missing_statx +# define statx(dfd, filename, flags, mask, buffer) missing_statx(dfd, filename, flags, mask, buffer) #endif #if !HAVE_SET_MEMPOLICY diff --git a/src/basic/mountpoint-util.c b/src/basic/mountpoint-util.c index df1f0ac34c..87cb5558f4 100644 --- a/src/basic/mountpoint-util.c +++ b/src/basic/mountpoint-util.c @@ -8,6 +8,8 @@ #include "fd-util.h" #include "fileio.h" #include "fs-util.h" +#include "missing_stat.h" +#include "missing_syscall.h" #include "mountpoint-util.h" #include "parse-util.h" #include "path-util.h" @@ -32,6 +34,8 @@ int name_to_handle_at_loop( _cleanup_free_ struct file_handle *h = NULL; size_t n = ORIGINAL_MAX_HANDLE_SZ; + assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); + /* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified * buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a * start value, it is not an upper bound on the buffer size required. @@ -86,13 +90,16 @@ int name_to_handle_at_loop( } } -static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) { +static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) { char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)]; _cleanup_free_ char *fdinfo = NULL; _cleanup_close_ int subfd = -1; char *p; int r; + assert(ret_mnt_id); + assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); + if ((flags & AT_EMPTY_PATH) && isempty(filename)) xsprintf(path, "/proc/self/fdinfo/%i", fd); else { @@ -121,7 +128,7 @@ static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id p += strspn(p, WHITESPACE); p[strcspn(p, WHITESPACE)] = 0; - return safe_atoi(p, mnt_id); + return safe_atoi(p, ret_mnt_id); } int fd_is_mount_point(int fd, const char *filename, int flags) { @@ -129,33 +136,46 @@ int fd_is_mount_point(int fd, const char *filename, int flags) { int mount_id = -1, mount_id_parent = -1; bool nosupp = false, check_st_dev = true; struct stat a, b; + struct statx sx +#if HAS_FEATURE_MEMORY_SANITIZER + = {} +# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this." +#endif + ; int r; assert(fd >= 0); assert(filename); + assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0); - /* First we will try the name_to_handle_at() syscall, which - * tells us the mount id and an opaque file "handle". It is - * not supported everywhere though (kernel compile-time - * option, not all file systems are hooked up). If it works - * the mount id is usually good enough to tell us whether - * something is a mount point. + /* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available + * since kernel 5.8. * - * If that didn't work we will try to read the mount id from - * /proc/self/fdinfo/. This is almost as good as - * name_to_handle_at(), however, does not return the - * opaque file handle. The opaque file handle is pretty useful - * to detect the root directory, which we should always - * consider a mount point. Hence we use this only as - * fallback. Exporting the mnt_id in fdinfo is a pretty recent + * If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and + * an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not + * all file systems are hooked up). If it works the mount id is usually good enough to tell us + * whether something is a mount point. + * + * If that didn't work we will try to read the mount id from /proc/self/fdinfo/. This is almost + * as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file + * handle is pretty useful to detect the root directory, which we should always consider a mount + * point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent * kernel addition. * - * As last fallback we do traditional fstat() based st_dev - * comparisons. This is how things were traditionally done, - * but unionfs breaks this since it exposes file - * systems with a variety of st_dev reported. Also, btrfs - * subvolumes have different st_dev, even though they aren't - * real mounts of their own. */ + * As last fallback we do traditional fstat() based st_dev comparisons. This is how things were + * traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev + * reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of + * their own. */ + + if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) | + (flags & AT_EMPTY_PATH) | + AT_NO_AUTOMOUNT, 0, &sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + /* If statx() is not available or forbidden, fallback to name_to_handle_at() below */ + } else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */ + return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT); r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags); if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) @@ -278,8 +298,29 @@ int path_is_mount_point(const char *t, const char *root, int flags) { } int path_get_mnt_id(const char *path, int *ret) { + union { + struct statx sx; + struct new_statx nsx; + } buf +#if HAS_FEATURE_MEMORY_SANITIZER + = {} +# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this." +#endif + ; int r; + if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) { + if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno)) + return -errno; + + /* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack + * privileges */ + + } else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) { + *ret = buf.nsx.stx_mnt_id; + return 0; + } + r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0); if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */ return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);