1
1
mirror of https://github.com/systemd/systemd-stable.git synced 2024-12-28 07:21:32 +03:00

Merge pull request #21077 from poettering/mount-setattr

optimize remounting with mount_setattr() syscall
This commit is contained in:
Yu Watanabe 2021-10-25 23:15:10 +09:00 committed by GitHub
commit f8aa82e43f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 90 additions and 19 deletions

View File

@ -465,10 +465,18 @@ struct mount_attr;
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
#ifndef MOUNT_ATTR_NOSYMFOLLOW
#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
#endif
#ifndef AT_RECURSIVE
#define AT_RECURSIVE 0x8000
#endif
#ifndef MOUNT_ATTR_SIZE_VER0
#define MOUNT_ATTR_SIZE_VER0 32
#endif
static inline int missing_mount_setattr(
int dfd,
const char *path,

View File

@ -134,6 +134,31 @@ int umount_recursive(const char *prefix, int flags) {
return n;
}
#define MS_CONVERTIBLE_FLAGS (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_NOSYMFOLLOW)
static uint64_t ms_flags_to_mount_attr(unsigned long a) {
uint64_t f = 0;
if (FLAGS_SET(a, MS_RDONLY))
f |= MOUNT_ATTR_RDONLY;
if (FLAGS_SET(a, MS_NOSUID))
f |= MOUNT_ATTR_NOSUID;
if (FLAGS_SET(a, MS_NODEV))
f |= MOUNT_ATTR_NODEV;
if (FLAGS_SET(a, MS_NOEXEC))
f |= MOUNT_ATTR_NOEXEC;
if (FLAGS_SET(a, MS_NOSYMFOLLOW))
f |= MOUNT_ATTR_NOSYMFOLLOW;
return f;
}
static bool skip_mount_set_attr = false;
/* Use this function only if you do not have direct access to /proc/self/mountinfo but the caller can open it
* for you. This is the case when /proc is masked or not mounted. Otherwise, use bind_remount_recursive. */
int bind_remount_recursive_with_mountinfo(
@ -143,12 +168,44 @@ int bind_remount_recursive_with_mountinfo(
char **deny_list,
FILE *proc_self_mountinfo) {
_cleanup_fclose_ FILE *proc_self_mountinfo_opened = NULL;
_cleanup_set_free_ Set *done = NULL;
unsigned n_tries = 0;
int r;
assert(prefix);
assert(proc_self_mountinfo);
if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && strv_isempty(deny_list) && !skip_mount_set_attr) {
/* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
if (mount_setattr(AT_FDCWD, prefix, AT_SYMLINK_NOFOLLOW|AT_RECURSIVE,
&(struct mount_attr) {
.attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
.attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
}, MOUNT_ATTR_SIZE_VER0) < 0) {
log_debug_errno(errno, "mount_setattr() failed, falling back to classic remounting: %m");
/* We fall through to classic behaviour if not supported (i.e. kernel < 5.12). We
* also do this for all other kinds of errors since they are so many different, and
* mount_setattr() has no graceful mode where it continues despite seeing errors one
* some mounts, but we want that. Moreover mount_setattr() only works on the mount
* point inode itself, not a non-mount point inode, and we want to support arbitrary
* prefixes here. */
if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
skip_mount_set_attr = true;
} else
return 0; /* Nice, this worked! */
}
if (!proc_self_mountinfo) {
r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo_opened);
if (r < 0)
return r;
proc_self_mountinfo = proc_self_mountinfo_opened;
}
/* Recursively remount a directory (and all its submounts) with desired flags (MS_READONLY,
* MS_NOSUID, MS_NOEXEC). If the directory is already mounted, we reuse the mount and simply mark it
@ -343,22 +400,6 @@ int bind_remount_recursive_with_mountinfo(
}
}
int bind_remount_recursive(
const char *prefix,
unsigned long new_flags,
unsigned long flags_mask,
char **deny_list) {
_cleanup_fclose_ FILE *proc_self_mountinfo = NULL;
int r;
r = fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo);
if (r < 0)
return r;
return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, proc_self_mountinfo);
}
int bind_remount_one_with_mountinfo(
const char *path,
unsigned long new_flags,
@ -374,6 +415,23 @@ int bind_remount_one_with_mountinfo(
assert(path);
assert(proc_self_mountinfo);
if ((flags_mask & ~MS_CONVERTIBLE_FLAGS) == 0 && !skip_mount_set_attr) {
/* Let's take a shortcut for all the flags we know how to convert into mount_setattr() flags */
if (mount_setattr(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW,
&(struct mount_attr) {
.attr_set = ms_flags_to_mount_attr(new_flags & flags_mask),
.attr_clr = ms_flags_to_mount_attr(~new_flags & flags_mask),
}, MOUNT_ATTR_SIZE_VER0) < 0) {
log_debug_errno(errno, "mount_setattr() didn't work, falling back to classic remounting: %m");
if (ERRNO_IS_NOT_SUPPORTED(errno)) /* if not supported, then don't bother at all anymore */
skip_mount_set_attr = true;
} else
return 0; /* Nice, this worked! */
}
rewind(proc_self_mountinfo);
table = mnt_new_table();

View File

@ -40,8 +40,12 @@ int mount_nofollow(const char *source, const char *target, const char *filesyste
int repeat_unmount(const char *path, int flags);
int umount_recursive(const char *target, int flags);
int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list);
int bind_remount_recursive_with_mountinfo(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list, FILE *proc_self_mountinfo);
static inline int bind_remount_recursive(const char *prefix, unsigned long new_flags, unsigned long flags_mask, char **deny_list) {
return bind_remount_recursive_with_mountinfo(prefix, new_flags, flags_mask, deny_list, NULL);
}
int bind_remount_one_with_mountinfo(const char *path, unsigned long new_flags, unsigned long flags_mask, FILE *proc_self_mountinfo);
int mount_move_root(const char *path);

View File

@ -169,7 +169,7 @@ static void test_bind_remount_recursive(void) {
assert_se(!FLAGS_SET(svfs.f_flag, ST_RDONLY));
/* Now mark the path we currently run for read-only */
assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, STRV_MAKE("/sys/kernel")) >= 0);
assert_se(bind_remount_recursive(p, MS_RDONLY, MS_RDONLY, path_equal(p, "/sys") ? STRV_MAKE("/sys/kernel") : NULL) >= 0);
/* Ensure that this worked on the top-level */
assert_se(statvfs(p, &svfs) >= 0);
@ -209,6 +209,7 @@ static void test_bind_remount_one(void) {
assert_se(fopen_unlocked("/proc/self/mountinfo", "re", &proc_self_mountinfo) >= 0);
assert_se(bind_remount_one_with_mountinfo("/run", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0);
assert_se(bind_remount_one_with_mountinfo("/run", MS_NOEXEC, MS_RDONLY|MS_NOEXEC, proc_self_mountinfo) >= 0);
assert_se(bind_remount_one_with_mountinfo("/proc/idontexist", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -ENOENT);
assert_se(bind_remount_one_with_mountinfo("/proc/self", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) == -EINVAL);
assert_se(bind_remount_one_with_mountinfo("/", MS_RDONLY, MS_RDONLY, proc_self_mountinfo) >= 0);