mirror of
https://github.com/systemd/systemd-stable.git
synced 2025-02-26 09:57:26 +03:00
Merge pull request #16677 from poettering/statx-mntid
make use of new kernel 5.8 statx() mount id/mountpoint APIs
This commit is contained in:
commit
0cd9ccb654
3
TODO
3
TODO
@ -19,9 +19,6 @@ Features:
|
||||
|
||||
* nss-systemd: also synthesize shadow records for users/groups
|
||||
|
||||
* make use of the new statx mountid and rootmount fields in path_get_mnt_id()
|
||||
and fd_is_mount_point()
|
||||
|
||||
* nspawn: move "incoming mount" directory to /run/host, move "inaccessible"
|
||||
nodes to /run/host, move notify socket (for sd_notify() between payload and
|
||||
container manager)
|
||||
|
@ -8,38 +8,47 @@
|
||||
#include <linux/stat.h>
|
||||
#endif
|
||||
|
||||
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
|
||||
/* Thew newest definition we are aware of (fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60; 5.8) */
|
||||
#define STATX_DEFINITION { \
|
||||
__u32 stx_mask; \
|
||||
__u32 stx_blksize; \
|
||||
__u64 stx_attributes; \
|
||||
__u32 stx_nlink; \
|
||||
__u32 stx_uid; \
|
||||
__u32 stx_gid; \
|
||||
__u16 stx_mode; \
|
||||
__u16 __spare0[1]; \
|
||||
__u64 stx_ino; \
|
||||
__u64 stx_size; \
|
||||
__u64 stx_blocks; \
|
||||
__u64 stx_attributes_mask; \
|
||||
struct statx_timestamp stx_atime; \
|
||||
struct statx_timestamp stx_btime; \
|
||||
struct statx_timestamp stx_ctime; \
|
||||
struct statx_timestamp stx_mtime; \
|
||||
__u32 stx_rdev_major; \
|
||||
__u32 stx_rdev_minor; \
|
||||
__u32 stx_dev_major; \
|
||||
__u32 stx_dev_minor; \
|
||||
__u64 stx_mnt_id; \
|
||||
__u64 __spare2; \
|
||||
__u64 __spare3[12]; \
|
||||
}
|
||||
|
||||
#if !HAVE_STRUCT_STATX
|
||||
struct statx_timestamp {
|
||||
__s64 tv_sec;
|
||||
__u32 tv_nsec;
|
||||
__s32 __reserved;
|
||||
};
|
||||
struct statx {
|
||||
__u32 stx_mask;
|
||||
__u32 stx_blksize;
|
||||
__u64 stx_attributes;
|
||||
__u32 stx_nlink;
|
||||
__u32 stx_uid;
|
||||
__u32 stx_gid;
|
||||
__u16 stx_mode;
|
||||
__u16 __spare0[1];
|
||||
__u64 stx_ino;
|
||||
__u64 stx_size;
|
||||
__u64 stx_blocks;
|
||||
__u64 stx_attributes_mask;
|
||||
struct statx_timestamp stx_atime;
|
||||
struct statx_timestamp stx_btime;
|
||||
struct statx_timestamp stx_ctime;
|
||||
struct statx_timestamp stx_mtime;
|
||||
__u32 stx_rdev_major;
|
||||
__u32 stx_rdev_minor;
|
||||
__u32 stx_dev_major;
|
||||
__u32 stx_dev_minor;
|
||||
__u64 __spare2[14];
|
||||
};
|
||||
|
||||
struct statx STATX_DEFINITION;
|
||||
#endif
|
||||
|
||||
/* Always define the newest version we are aware of as a distinct type, so that we can use it even if glibc
|
||||
* defines an older definition */
|
||||
struct new_statx STATX_DEFINITION;
|
||||
|
||||
/* a528d35e8bfcc521d7cb70aaf03e1bd296c8493f (4.11) */
|
||||
#ifndef STATX_BTIME
|
||||
#define STATX_BTIME 0x00000800U
|
||||
@ -49,3 +58,13 @@ struct statx {
|
||||
#ifndef AT_STATX_DONT_SYNC
|
||||
#define AT_STATX_DONT_SYNC 0x4000
|
||||
#endif
|
||||
|
||||
/* fa2fcf4f1df1559a0a4ee0f46915b496cc2ebf60 (5.8) */
|
||||
#ifndef STATX_MNT_ID
|
||||
#define STATX_MNT_ID 0x00001000U
|
||||
#endif
|
||||
|
||||
/* 80340fe3605c0e78cfe496c3b3878be828cfdbfe (5.8) */
|
||||
#ifndef STATX_ATTR_MOUNT_ROOT
|
||||
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
|
||||
#endif
|
||||
|
@ -482,7 +482,7 @@ static inline ssize_t missing_statx(int dfd, const char *filename, unsigned flag
|
||||
# endif
|
||||
}
|
||||
|
||||
# define statx missing_statx
|
||||
# define statx(dfd, filename, flags, mask, buffer) missing_statx(dfd, filename, flags, mask, buffer)
|
||||
#endif
|
||||
|
||||
#if !HAVE_SET_MEMPOLICY
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include "fd-util.h"
|
||||
#include "fileio.h"
|
||||
#include "fs-util.h"
|
||||
#include "missing_stat.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "mountpoint-util.h"
|
||||
#include "parse-util.h"
|
||||
#include "path-util.h"
|
||||
@ -32,6 +34,8 @@ int name_to_handle_at_loop(
|
||||
_cleanup_free_ struct file_handle *h = NULL;
|
||||
size_t n = ORIGINAL_MAX_HANDLE_SZ;
|
||||
|
||||
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
|
||||
|
||||
/* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
|
||||
* buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
|
||||
* start value, it is not an upper bound on the buffer size required.
|
||||
@ -86,13 +90,16 @@ int name_to_handle_at_loop(
|
||||
}
|
||||
}
|
||||
|
||||
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id) {
|
||||
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
|
||||
char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
|
||||
_cleanup_free_ char *fdinfo = NULL;
|
||||
_cleanup_close_ int subfd = -1;
|
||||
char *p;
|
||||
int r;
|
||||
|
||||
assert(ret_mnt_id);
|
||||
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
|
||||
|
||||
if ((flags & AT_EMPTY_PATH) && isempty(filename))
|
||||
xsprintf(path, "/proc/self/fdinfo/%i", fd);
|
||||
else {
|
||||
@ -121,7 +128,7 @@ static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *mnt_id
|
||||
p += strspn(p, WHITESPACE);
|
||||
p[strcspn(p, WHITESPACE)] = 0;
|
||||
|
||||
return safe_atoi(p, mnt_id);
|
||||
return safe_atoi(p, ret_mnt_id);
|
||||
}
|
||||
|
||||
int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
@ -129,33 +136,46 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
int mount_id = -1, mount_id_parent = -1;
|
||||
bool nosupp = false, check_st_dev = true;
|
||||
struct stat a, b;
|
||||
struct statx sx
|
||||
#if HAS_FEATURE_MEMORY_SANITIZER
|
||||
= {}
|
||||
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
|
||||
#endif
|
||||
;
|
||||
int r;
|
||||
|
||||
assert(fd >= 0);
|
||||
assert(filename);
|
||||
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
|
||||
|
||||
/* First we will try the name_to_handle_at() syscall, which
|
||||
* tells us the mount id and an opaque file "handle". It is
|
||||
* not supported everywhere though (kernel compile-time
|
||||
* option, not all file systems are hooked up). If it works
|
||||
* the mount id is usually good enough to tell us whether
|
||||
* something is a mount point.
|
||||
/* First we will try statx()' STATX_ATTR_MOUNT_ROOT attribute, which is our ideal API, available
|
||||
* since kernel 5.8.
|
||||
*
|
||||
* If that didn't work we will try to read the mount id from
|
||||
* /proc/self/fdinfo/<fd>. This is almost as good as
|
||||
* name_to_handle_at(), however, does not return the
|
||||
* opaque file handle. The opaque file handle is pretty useful
|
||||
* to detect the root directory, which we should always
|
||||
* consider a mount point. Hence we use this only as
|
||||
* fallback. Exporting the mnt_id in fdinfo is a pretty recent
|
||||
* If that fails, our second try is the name_to_handle_at() syscall, which tells us the mount id and
|
||||
* an opaque file "handle". It is not supported everywhere though (kernel compile-time option, not
|
||||
* all file systems are hooked up). If it works the mount id is usually good enough to tell us
|
||||
* whether something is a mount point.
|
||||
*
|
||||
* If that didn't work we will try to read the mount id from /proc/self/fdinfo/<fd>. This is almost
|
||||
* as good as name_to_handle_at(), however, does not return the opaque file handle. The opaque file
|
||||
* handle is pretty useful to detect the root directory, which we should always consider a mount
|
||||
* point. Hence we use this only as fallback. Exporting the mnt_id in fdinfo is a pretty recent
|
||||
* kernel addition.
|
||||
*
|
||||
* As last fallback we do traditional fstat() based st_dev
|
||||
* comparisons. This is how things were traditionally done,
|
||||
* but unionfs breaks this since it exposes file
|
||||
* systems with a variety of st_dev reported. Also, btrfs
|
||||
* subvolumes have different st_dev, even though they aren't
|
||||
* real mounts of their own. */
|
||||
* As last fallback we do traditional fstat() based st_dev comparisons. This is how things were
|
||||
* traditionally done, but unionfs breaks this since it exposes file systems with a variety of st_dev
|
||||
* reported. Also, btrfs subvolumes have different st_dev, even though they aren't real mounts of
|
||||
* their own. */
|
||||
|
||||
if (statx(fd, filename, (FLAGS_SET(flags, AT_SYMLINK_FOLLOW) ? 0 : AT_SYMLINK_NOFOLLOW) |
|
||||
(flags & AT_EMPTY_PATH) |
|
||||
AT_NO_AUTOMOUNT, 0, &sx) < 0) {
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||
return -errno;
|
||||
|
||||
/* If statx() is not available or forbidden, fallback to name_to_handle_at() below */
|
||||
} else if (FLAGS_SET(sx.stx_attributes_mask, STATX_ATTR_MOUNT_ROOT)) /* yay! */
|
||||
return FLAGS_SET(sx.stx_attributes, STATX_ATTR_MOUNT_ROOT);
|
||||
|
||||
r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
|
||||
if (IN_SET(r, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL))
|
||||
@ -278,8 +298,29 @@ int path_is_mount_point(const char *t, const char *root, int flags) {
|
||||
}
|
||||
|
||||
int path_get_mnt_id(const char *path, int *ret) {
|
||||
union {
|
||||
struct statx sx;
|
||||
struct new_statx nsx;
|
||||
} buf
|
||||
#if HAS_FEATURE_MEMORY_SANITIZER
|
||||
= {}
|
||||
# warning "Explicitly initializing struct statx, to work around msan limitation. Please remove as soon as msan has been updated to not require this."
|
||||
#endif
|
||||
;
|
||||
int r;
|
||||
|
||||
if (statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT, STATX_MNT_ID, &buf.sx) < 0) {
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
|
||||
return -errno;
|
||||
|
||||
/* Fall back to name_to_handle_at() and then fdinfo if statx is not supported or we lack
|
||||
* privileges */
|
||||
|
||||
} else if (FLAGS_SET(buf.nsx.stx_mask, STATX_MNT_ID)) {
|
||||
*ret = buf.nsx.stx_mnt_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = name_to_handle_at_loop(AT_FDCWD, path, NULL, ret, 0);
|
||||
if (IN_SET(r, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL)) /* kernel/fs don't support this, or seccomp blocks access, or untriggered mount, or name_to_handle_at() is flaky */
|
||||
return fd_fdinfo_mnt_id(AT_FDCWD, path, 0, ret);
|
||||
|
Loading…
x
Reference in New Issue
Block a user