mirror of
https://github.com/systemd/systemd.git
synced 2025-01-05 13:18:06 +03:00
Merge pull request #33475 from poettering/name-to-handle-at-fid
teach inode_same() the concept of name_to_handle_at() FIDs to properly detect inode identities
This commit is contained in:
commit
1c30bf35bc
@ -92,3 +92,7 @@
|
||||
#define RAW_O_LARGEFILE 00100000
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef AT_HANDLE_FID
|
||||
#define AT_HANDLE_FID AT_REMOVEDIR
|
||||
#endif
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "fileio.h"
|
||||
#include "filesystems.h"
|
||||
#include "fs-util.h"
|
||||
#include "missing_fcntl.h"
|
||||
#include "missing_fs.h"
|
||||
#include "missing_mount.h"
|
||||
#include "missing_stat.h"
|
||||
@ -35,6 +36,24 @@
|
||||
* with large file handles anyway. */
|
||||
#define ORIGINAL_MAX_HANDLE_SZ 128
|
||||
|
||||
bool is_name_to_handle_at_fatal_error(int err) {
|
||||
/* name_to_handle_at() can return "acceptable" errors that are due to the context. For example the
|
||||
* kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall was blocked
|
||||
* (EACCES/EPERM; maybe through seccomp, because we are running inside of a container), or the mount
|
||||
* point is not triggered yet (EOVERFLOW, think autofs+nfs4), or some general name_to_handle_at()
|
||||
* flakiness (EINVAL). However other errors are not supposed to happen and therefore are considered
|
||||
* fatal ones. */
|
||||
|
||||
assert(err < 0);
|
||||
|
||||
if (ERRNO_IS_NEG_NOT_SUPPORTED(err))
|
||||
return false;
|
||||
if (ERRNO_IS_NEG_PRIVILEGE(err))
|
||||
return false;
|
||||
|
||||
return !IN_SET(err, -EOVERFLOW, -EINVAL);
|
||||
}
|
||||
|
||||
int name_to_handle_at_loop(
|
||||
int fd,
|
||||
const char *path,
|
||||
@ -44,7 +63,8 @@ int name_to_handle_at_loop(
|
||||
|
||||
size_t n = ORIGINAL_MAX_HANDLE_SZ;
|
||||
|
||||
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH)) == 0);
|
||||
assert(fd >= 0 || fd == AT_FDCWD);
|
||||
assert((flags & ~(AT_SYMLINK_FOLLOW|AT_EMPTY_PATH|AT_HANDLE_FID)) == 0);
|
||||
|
||||
/* We need to invoke name_to_handle_at() in a loop, given that it might return EOVERFLOW when the specified
|
||||
* buffer is too small. Note that in contrast to what the docs might suggest, MAX_HANDLE_SZ is only good as a
|
||||
@ -86,9 +106,9 @@ int name_to_handle_at_loop(
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by something
|
||||
* else (apparently EOVERFLOW is returned for untriggered nfs4 mounts sometimes), not by the too small
|
||||
* buffer. In that case propagate EOVERFLOW */
|
||||
/* If name_to_handle_at() didn't increase the byte size, then this EOVERFLOW is caused by
|
||||
* something else (apparently EOVERFLOW is returned for untriggered nfs4 autofs mounts
|
||||
* sometimes), not by the too small buffer. In that case propagate EOVERFLOW */
|
||||
if (h->handle_bytes <= n)
|
||||
return -EOVERFLOW;
|
||||
|
||||
@ -101,6 +121,30 @@ int name_to_handle_at_loop(
|
||||
}
|
||||
}
|
||||
|
||||
int name_to_handle_at_try_fid(
|
||||
int fd,
|
||||
const char *path,
|
||||
struct file_handle **ret_handle,
|
||||
int *ret_mnt_id,
|
||||
int flags) {
|
||||
|
||||
int r;
|
||||
|
||||
assert(fd >= 0 || fd == AT_FDCWD);
|
||||
|
||||
/* First issues name_to_handle_at() with AT_HANDLE_FID. If this fails and this is not a fatal error
|
||||
* we'll try without the flag, in order to support older kernels that didn't have AT_HANDLE_FID
|
||||
* (i.e. older than Linux 6.5). */
|
||||
|
||||
r = name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags | AT_HANDLE_FID);
|
||||
if (r >= 0)
|
||||
return r;
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
|
||||
return name_to_handle_at_loop(fd, path, ret_handle, ret_mnt_id, flags & ~AT_HANDLE_FID);
|
||||
}
|
||||
|
||||
static int fd_fdinfo_mnt_id(int fd, const char *filename, int flags, int *ret_mnt_id) {
|
||||
char path[STRLEN("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
|
||||
_cleanup_free_ char *fdinfo = NULL;
|
||||
@ -160,17 +204,15 @@ static bool filename_possibly_with_slash_suffix(const char *s) {
|
||||
return filename_is_valid(copied);
|
||||
}
|
||||
|
||||
static bool is_name_to_handle_at_fatal_error(int err) {
|
||||
/* name_to_handle_at() can return "acceptable" errors that are due to the context. For
|
||||
* example the kernel does not support name_to_handle_at() at all (ENOSYS), or the syscall
|
||||
* was blocked (EACCES/EPERM; maybe through seccomp, because we are running inside of a
|
||||
* container), or the mount point is not triggered yet (EOVERFLOW, think nfs4), or some
|
||||
* general name_to_handle_at() flakiness (EINVAL). However other errors are not supposed to
|
||||
* happen and therefore are considered fatal ones. */
|
||||
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b) {
|
||||
if (a == b)
|
||||
return true;
|
||||
if (!a != !b)
|
||||
return false;
|
||||
if (a->handle_type != b->handle_type)
|
||||
return false;
|
||||
|
||||
assert(err < 0);
|
||||
|
||||
return !IN_SET(err, -EOPNOTSUPP, -ENOSYS, -EACCES, -EPERM, -EOVERFLOW, -EINVAL);
|
||||
return memcmp_nn(a->f_handle, a->handle_bytes, b->f_handle, b->handle_bytes) == 0;
|
||||
}
|
||||
|
||||
int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
@ -234,11 +276,11 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
else if (FLAGS_SET(sx.stx_mask, STATX_TYPE) && S_ISLNK(sx.stx_mode))
|
||||
return false; /* symlinks are never mount points */
|
||||
|
||||
r = name_to_handle_at_loop(fd, filename, &h, &mount_id, flags);
|
||||
r = name_to_handle_at_try_fid(fd, filename, &h, &mount_id, flags);
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
if (r != -EOPNOTSUPP)
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(r))
|
||||
goto fallback_fdinfo;
|
||||
|
||||
/* This kernel or file system does not support name_to_handle_at(), hence let's see
|
||||
@ -248,13 +290,13 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
}
|
||||
|
||||
if (isempty(filename))
|
||||
r = name_to_handle_at_loop(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
|
||||
r = name_to_handle_at_try_fid(fd, "..", &h_parent, &mount_id_parent, 0); /* can't work for non-directories 😢 */
|
||||
else
|
||||
r = name_to_handle_at_loop(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
|
||||
r = name_to_handle_at_try_fid(fd, "", &h_parent, &mount_id_parent, AT_EMPTY_PATH);
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
if (r != -EOPNOTSUPP)
|
||||
if (!ERRNO_IS_NOT_SUPPORTED(r))
|
||||
goto fallback_fdinfo;
|
||||
if (nosupp)
|
||||
/* Both the parent and the directory can't do name_to_handle_at() */
|
||||
@ -272,17 +314,14 @@ int fd_is_mount_point(int fd, const char *filename, int flags) {
|
||||
|
||||
/* If the file handle for the directory we are interested in and its parent are identical,
|
||||
* we assume this is the root directory, which is a mount point. */
|
||||
|
||||
if (h->handle_type == h_parent->handle_type &&
|
||||
memcmp_nn(h->f_handle, h->handle_bytes,
|
||||
h_parent->f_handle, h_parent->handle_bytes) == 0)
|
||||
if (file_handle_equal(h_parent, h))
|
||||
return 1;
|
||||
|
||||
return mount_id != mount_id_parent;
|
||||
|
||||
fallback_fdinfo:
|
||||
r = fd_fdinfo_mnt_id(fd, filename, flags, &mount_id);
|
||||
if (IN_SET(r, -EOPNOTSUPP, -EACCES, -EPERM, -ENOSYS))
|
||||
if (ERRNO_IS_NEG_NOT_SUPPORTED(r) || ERRNO_IS_NEG_PRIVILEGE(r))
|
||||
goto fallback_fstat;
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -36,7 +36,12 @@
|
||||
#define TMPFS_LIMITS_ROOTFS TMPFS_LIMITS_VAR
|
||||
#define TMPFS_LIMITS_VOLATILE_STATE TMPFS_LIMITS_VAR
|
||||
|
||||
bool is_name_to_handle_at_fatal_error(int err);
|
||||
|
||||
int name_to_handle_at_loop(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
|
||||
int name_to_handle_at_try_fid(int fd, const char *path, struct file_handle **ret_handle, int *ret_mnt_id, int flags);
|
||||
|
||||
bool file_handle_equal(const struct file_handle *a, const struct file_handle *b);
|
||||
|
||||
int path_get_mnt_id_at_fallback(int dir_fd, const char *path, int *ret);
|
||||
int path_get_mnt_id_at(int dir_fd, const char *path, int *ret);
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "missing_fs.h"
|
||||
#include "missing_magic.h"
|
||||
#include "missing_syscall.h"
|
||||
#include "mountpoint-util.h"
|
||||
#include "nulstr-util.h"
|
||||
#include "parse-util.h"
|
||||
#include "stat-util.h"
|
||||
@ -271,18 +272,103 @@ int path_is_read_only_fs(const char *path) {
|
||||
}
|
||||
|
||||
int inode_same_at(int fda, const char *filea, int fdb, const char *fileb, int flags) {
|
||||
struct stat a, b;
|
||||
struct stat sta, stb;
|
||||
int r;
|
||||
|
||||
assert(fda >= 0 || fda == AT_FDCWD);
|
||||
assert(fdb >= 0 || fdb == AT_FDCWD);
|
||||
assert((flags & ~(AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW|AT_NO_AUTOMOUNT)) == 0);
|
||||
|
||||
if (fstatat(fda, strempty(filea), &a, flags) < 0)
|
||||
return log_debug_errno(errno, "Cannot stat %s: %m", filea);
|
||||
/* Refuse an unset filea or fileb early unless AT_EMPTY_PATH is set */
|
||||
if ((isempty(filea) || isempty(fileb)) && !FLAGS_SET(flags, AT_EMPTY_PATH))
|
||||
return -EINVAL;
|
||||
|
||||
if (fstatat(fdb, strempty(fileb), &b, flags) < 0)
|
||||
return log_debug_errno(errno, "Cannot stat %s: %m", fileb);
|
||||
/* Shortcut: comparing the same fd with itself means we can return true */
|
||||
if (fda >= 0 && fda == fdb && isempty(filea) && isempty(fileb) && FLAGS_SET(flags, AT_SYMLINK_NOFOLLOW))
|
||||
return true;
|
||||
|
||||
return stat_inode_same(&a, &b);
|
||||
_cleanup_close_ int pin_a = -EBADF, pin_b = -EBADF;
|
||||
if (!FLAGS_SET(flags, AT_NO_AUTOMOUNT)) {
|
||||
/* Let's try to use the name_to_handle_at() AT_HANDLE_FID API to identify identical
|
||||
* inodes. We have to issue multiple calls on the same file for that (first, to acquire the
|
||||
* FID, and then to check if .st_dev is actually the same). Hence let's pin the inode in
|
||||
* between via O_PATH, unless we already have an fd for it. */
|
||||
|
||||
if (!isempty(filea)) {
|
||||
pin_a = openat(fda, filea, O_PATH|O_CLOEXEC|(FLAGS_SET(flags, AT_SYMLINK_NOFOLLOW) ? O_NOFOLLOW : 0));
|
||||
if (pin_a < 0)
|
||||
return -errno;
|
||||
|
||||
fda = pin_a;
|
||||
filea = NULL;
|
||||
flags |= AT_EMPTY_PATH;
|
||||
}
|
||||
|
||||
if (!isempty(fileb)) {
|
||||
pin_b = openat(fdb, fileb, O_PATH|O_CLOEXEC|(FLAGS_SET(flags, AT_SYMLINK_NOFOLLOW) ? O_NOFOLLOW : 0));
|
||||
if (pin_b < 0)
|
||||
return -errno;
|
||||
|
||||
fdb = pin_b;
|
||||
fileb = NULL;
|
||||
flags |= AT_EMPTY_PATH;
|
||||
}
|
||||
|
||||
int ntha_flags = (flags & AT_EMPTY_PATH) | (FLAGS_SET(flags, AT_SYMLINK_NOFOLLOW) ? 0 : AT_SYMLINK_FOLLOW);
|
||||
_cleanup_free_ struct file_handle *ha = NULL, *hb = NULL;
|
||||
int mntida = -1, mntidb = -1;
|
||||
|
||||
r = name_to_handle_at_try_fid(
|
||||
fda,
|
||||
filea,
|
||||
&ha,
|
||||
&mntida,
|
||||
ntha_flags);
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
r = name_to_handle_at_try_fid(
|
||||
fdb,
|
||||
fileb,
|
||||
&hb,
|
||||
&mntidb,
|
||||
ntha_flags);
|
||||
if (r < 0) {
|
||||
if (is_name_to_handle_at_fatal_error(r))
|
||||
return r;
|
||||
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
/* Now compare the two file handles */
|
||||
if (!file_handle_equal(ha, hb))
|
||||
return false;
|
||||
|
||||
/* If the file handles are the same and they come from the same mount ID? Great, then we are
|
||||
* good, they are definitely the same */
|
||||
if (mntida == mntidb)
|
||||
return true;
|
||||
|
||||
/* File handles are the same, they are not on the same mount id. This might either be because
|
||||
* they are on two entirely different file systems, that just happen to have the same FIDs
|
||||
* (because they originally where created off the same disk images), or it could be because
|
||||
* they are located on two distinct bind mounts of the same fs. To check that, let's look at
|
||||
* .st_rdev of the inode. We simply reuse the fallback codepath for that, since it checks
|
||||
* exactly that (it checks slightly more, but we don't care.) */
|
||||
}
|
||||
|
||||
fallback:
|
||||
if (fstatat(fda, strempty(filea), &sta, flags) < 0)
|
||||
return log_debug_errno(errno, "Cannot stat %s: %m", strna(filea));
|
||||
|
||||
if (fstatat(fdb, strempty(fileb), &stb, flags) < 0)
|
||||
return log_debug_errno(errno, "Cannot stat %s: %m", strna(fileb));
|
||||
|
||||
return stat_inode_same(&sta, &stb);
|
||||
}
|
||||
|
||||
bool is_fs_type(const struct statfs *s, statfs_f_type_t magic_value) {
|
||||
|
@ -1815,3 +1815,13 @@ int make_fsmount(
|
||||
|
||||
return TAKE_FD(mnt_fd);
|
||||
}
|
||||
|
||||
char* umount_and_unlink_and_free(char *p) {
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
PROTECT_ERRNO;
|
||||
(void) umount2(p, 0);
|
||||
(void) unlink(p);
|
||||
return mfree(p);
|
||||
}
|
||||
|
@ -100,6 +100,9 @@ static inline char *umount_and_free(char *p) {
|
||||
}
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_free);
|
||||
|
||||
char* umount_and_unlink_and_free(char *p);
|
||||
DEFINE_TRIVIAL_CLEANUP_FUNC(char*, umount_and_unlink_and_free);
|
||||
|
||||
int bind_mount_in_namespace(PidRef *target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory);
|
||||
int mount_image_in_namespace(PidRef *target, const char *propagate_path, const char *incoming_path, const char *src, const char *dest, bool read_only, bool make_file_or_directory, const MountOptions *options, const ImagePolicy *image_policy);
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <linux/magic.h>
|
||||
#include <sched.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/mount.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "alloc-util.h"
|
||||
@ -11,6 +12,8 @@
|
||||
#include "fd-util.h"
|
||||
#include "fs-util.h"
|
||||
#include "macro.h"
|
||||
#include "missing_mount.h"
|
||||
#include "mount-util.h"
|
||||
#include "mountpoint-util.h"
|
||||
#include "namespace-util.h"
|
||||
#include "path-util.h"
|
||||
@ -47,15 +50,58 @@ TEST(inode_same) {
|
||||
_cleanup_close_ int fd = -EBADF;
|
||||
_cleanup_(unlink_tempfilep) char name[] = "/tmp/test-files_same.XXXXXX";
|
||||
_cleanup_(unlink_tempfilep) char name_alias[] = "/tmp/test-files_same.alias";
|
||||
int r;
|
||||
|
||||
fd = mkostemp_safe(name);
|
||||
assert_se(fd >= 0);
|
||||
assert_se(symlink(name, name_alias) >= 0);
|
||||
|
||||
assert_se(inode_same(name, name, 0));
|
||||
assert_se(inode_same(name, name, AT_SYMLINK_NOFOLLOW));
|
||||
assert_se(inode_same(name, name_alias, 0));
|
||||
assert_se(!inode_same(name, name_alias, AT_SYMLINK_NOFOLLOW));
|
||||
assert_se(inode_same(name, name, 0) > 0);
|
||||
assert_se(inode_same(name, name, AT_SYMLINK_NOFOLLOW) > 0);
|
||||
assert_se(inode_same(name, name_alias, 0) > 0);
|
||||
assert_se(inode_same(name, name_alias, AT_SYMLINK_NOFOLLOW) == 0);
|
||||
|
||||
assert_se(inode_same("/proc", "/proc", 0));
|
||||
assert_se(inode_same("/proc", "/proc", AT_SYMLINK_NOFOLLOW));
|
||||
|
||||
_cleanup_close_ int fd1 = open("/dev/null", O_CLOEXEC|O_RDONLY),
|
||||
fd2 = open("/dev/null", O_CLOEXEC|O_RDONLY);
|
||||
|
||||
assert_se(fd1 >= 0);
|
||||
assert_se(fd2 >= 0);
|
||||
|
||||
assert_se(inode_same_at(fd1, NULL, fd2, NULL, AT_EMPTY_PATH) > 0);
|
||||
assert_se(inode_same_at(fd2, NULL, fd1, NULL, AT_EMPTY_PATH) > 0);
|
||||
assert_se(inode_same_at(fd1, NULL, fd2, NULL, AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW) > 0);
|
||||
assert_se(inode_same_at(fd2, NULL, fd1, NULL, AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW) > 0);
|
||||
assert_se(inode_same_at(fd1, NULL, fd1, NULL, AT_EMPTY_PATH) > 0);
|
||||
assert_se(inode_same_at(fd2, NULL, fd2, NULL, AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW) > 0);
|
||||
|
||||
safe_close(fd2);
|
||||
fd2 = open("/dev/urandom", O_CLOEXEC|O_RDONLY);
|
||||
assert_se(fd2 >= 0);
|
||||
|
||||
assert_se(inode_same_at(fd1, NULL, fd2, NULL, AT_EMPTY_PATH) == 0);
|
||||
assert_se(inode_same_at(fd2, NULL, fd1, NULL, AT_EMPTY_PATH) == 0);
|
||||
assert_se(inode_same_at(fd1, NULL, fd2, NULL, AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW) == 0);
|
||||
assert_se(inode_same_at(fd2, NULL, fd1, NULL, AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW) == 0);
|
||||
|
||||
assert_se(inode_same_at(AT_FDCWD, NULL, AT_FDCWD, NULL, AT_EMPTY_PATH) > 0);
|
||||
assert_se(inode_same_at(AT_FDCWD, NULL, fd1, NULL, AT_EMPTY_PATH) == 0);
|
||||
assert_se(inode_same_at(fd1, NULL, AT_FDCWD, NULL, AT_EMPTY_PATH) == 0);
|
||||
|
||||
_cleanup_(umount_and_unlink_and_freep) char *p = NULL;
|
||||
|
||||
assert_se(tempfn_random_child(NULL, NULL, &p) >= 0);
|
||||
assert_se(touch(p) >= 0);
|
||||
|
||||
r = mount_nofollow_verbose(LOG_ERR, name, p, NULL, MS_BIND, NULL);
|
||||
if (r < 0)
|
||||
assert_se(ERRNO_IS_NEG_PRIVILEGE(r));
|
||||
else {
|
||||
assert_se(inode_same(name, p, 0) > 0);
|
||||
assert_se(inode_same(name, p, AT_SYMLINK_NOFOLLOW) > 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(is_symlink) {
|
||||
|
Loading…
Reference in New Issue
Block a user