overlayfs fixes for 5.11-rc7
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQSQHSd0lITzzeNWNm3h3BK/laaZPAUCYBuyTQAKCRDh3BK/laaZ PBBhAPwLy3ksQLhY7in4I8aKrSyWRpaCSAeLQUitxnX3eQiQnAD/S1EEIapwradV y4ou1PBRsGnhwNgArXODVCcTgqDJqw8= =GjU4 -----END PGP SIGNATURE----- Merge tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs Pull overlayfs fixes from Miklos Szeredi: - Fix capability conversion and minor overlayfs bugs that are related to the unprivileged overlay mounts introduced in this cycle. - Fix two recent (v5.10) and one old (v4.10) bug. - Clean up security xattr copy-up (related to a SELinux regression). * tag 'ovl-fixes-5.11-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs: ovl: implement volatile-specific fsync error behaviour ovl: skip getxattr of security labels ovl: fix dentry leak in ovl_get_redirect ovl: avoid deadlock on directory ioctl cap: fix conversions on getxattr ovl: perform vfs_getxattr() with mounter creds ovl: add warning on user_ns mismatch
This commit is contained in:
commit
4cb2c00c43
@ -586,6 +586,14 @@ without significant effort.
|
||||
The advantage of mounting with the "volatile" option is that all forms of
|
||||
sync calls to the upper filesystem are omitted.
|
||||
|
||||
In order to avoid a giving a false sense of safety, the syncfs (and fsync)
|
||||
semantics of volatile mounts are slightly different than that of the rest of
|
||||
VFS. If any writeback error occurs on the upperdir's filesystem after a
|
||||
volatile mount takes place, all sync functions will return an error. Once this
|
||||
condition is reached, the filesystem will not recover, and every subsequent sync
|
||||
call will return an error, even if the upperdir has not experience a new error
|
||||
since the last sync call.
|
||||
|
||||
When overlay is mounted with "volatile" option, the directory
|
||||
"$workdir/work/incompat/volatile" is created. During next mount, overlay
|
||||
checks for this directory and refuses to mount if present. This is a strong
|
||||
|
@ -84,6 +84,14 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
|
||||
|
||||
if (ovl_is_private_xattr(sb, name))
|
||||
continue;
|
||||
|
||||
error = security_inode_copy_up_xattr(name);
|
||||
if (error < 0 && error != -EOPNOTSUPP)
|
||||
break;
|
||||
if (error == 1) {
|
||||
error = 0;
|
||||
continue; /* Discard */
|
||||
}
|
||||
retry:
|
||||
size = vfs_getxattr(old, name, value, value_size);
|
||||
if (size == -ERANGE)
|
||||
@ -107,13 +115,6 @@ retry:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
error = security_inode_copy_up_xattr(name);
|
||||
if (error < 0 && error != -EOPNOTSUPP)
|
||||
break;
|
||||
if (error == 1) {
|
||||
error = 0;
|
||||
continue; /* Discard */
|
||||
}
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error) {
|
||||
if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
|
||||
|
@ -992,8 +992,8 @@ static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
|
||||
|
||||
buflen -= thislen;
|
||||
memcpy(&buf[buflen], name, thislen);
|
||||
tmp = dget_dlock(d->d_parent);
|
||||
spin_unlock(&d->d_lock);
|
||||
tmp = dget_parent(d);
|
||||
|
||||
dput(d);
|
||||
d = tmp;
|
||||
|
@ -398,8 +398,9 @@ static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
const struct cred *old_cred;
|
||||
int ret;
|
||||
|
||||
if (!ovl_should_sync(OVL_FS(file_inode(file)->i_sb)))
|
||||
return 0;
|
||||
ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
ret = ovl_real_fdget_meta(file, &real, !datasync);
|
||||
if (ret)
|
||||
|
@ -352,7 +352,9 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
|
||||
goto out;
|
||||
|
||||
if (!value && !upperdentry) {
|
||||
old_cred = ovl_override_creds(dentry->d_sb);
|
||||
err = vfs_getxattr(realdentry, name, NULL, 0);
|
||||
revert_creds(old_cred);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
@ -324,6 +324,7 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry);
|
||||
bool ovl_is_metacopy_dentry(struct dentry *dentry);
|
||||
char *ovl_get_redirect_xattr(struct ovl_fs *ofs, struct dentry *dentry,
|
||||
int padding);
|
||||
int ovl_sync_status(struct ovl_fs *ofs);
|
||||
|
||||
static inline bool ovl_is_impuredir(struct super_block *sb,
|
||||
struct dentry *dentry)
|
||||
|
@ -81,6 +81,8 @@ struct ovl_fs {
|
||||
atomic_long_t last_ino;
|
||||
/* Whiteout dentry cache */
|
||||
struct dentry *whiteout;
|
||||
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
|
||||
errseq_t errseq;
|
||||
};
|
||||
|
||||
static inline struct vfsmount *ovl_upper_mnt(struct ovl_fs *ofs)
|
||||
|
@ -865,7 +865,7 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
|
||||
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
struct file *old, *realfile = od->realfile;
|
||||
|
||||
if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
|
||||
return want_upper ? NULL : realfile;
|
||||
@ -874,29 +874,20 @@ struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = READ_ONCE(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_dir_open_realfile(file, &upperpath);
|
||||
if (IS_ERR(realfile))
|
||||
return realfile;
|
||||
|
||||
inode_lock(inode);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
inode_unlock(inode);
|
||||
return realfile;
|
||||
}
|
||||
smp_store_release(&od->upperfile, realfile);
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
old = cmpxchg_release(&od->upperfile, NULL, realfile);
|
||||
if (old) {
|
||||
fput(realfile);
|
||||
realfile = old;
|
||||
}
|
||||
inode_unlock(inode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -909,8 +900,9 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
if (!ovl_should_sync(OVL_FS(file->f_path.dentry->d_sb)))
|
||||
return 0;
|
||||
err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
|
||||
if (err <= 0)
|
||||
return err;
|
||||
|
||||
realfile = ovl_dir_real_file(file, true);
|
||||
err = PTR_ERR_OR_ZERO(realfile);
|
||||
|
@ -264,11 +264,20 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
|
||||
struct super_block *upper_sb;
|
||||
int ret;
|
||||
|
||||
if (!ovl_upper_mnt(ofs))
|
||||
return 0;
|
||||
ret = ovl_sync_status(ofs);
|
||||
/*
|
||||
* We have to always set the err, because the return value isn't
|
||||
* checked in syncfs, and instead indirectly return an error via
|
||||
* the sb's writeback errseq, which VFS inspects after this call.
|
||||
*/
|
||||
if (ret < 0) {
|
||||
errseq_set(&sb->s_wb_err, -EIO);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
if (!ovl_should_sync(ofs))
|
||||
return 0;
|
||||
/*
|
||||
* Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
|
||||
* All the super blocks will be iterated, including upper_sb.
|
||||
@ -1923,6 +1932,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
unsigned int numlower;
|
||||
int err;
|
||||
|
||||
err = -EIO;
|
||||
if (WARN_ON(sb->s_user_ns != current_user_ns()))
|
||||
goto out;
|
||||
|
||||
sb->s_d_op = &ovl_dentry_operations;
|
||||
|
||||
err = -ENOMEM;
|
||||
@ -1989,6 +2002,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_op = &ovl_super_operations;
|
||||
|
||||
if (ofs->config.upperdir) {
|
||||
struct super_block *upper_sb;
|
||||
|
||||
if (!ofs->config.workdir) {
|
||||
pr_err("missing 'workdir'\n");
|
||||
goto out_err;
|
||||
@ -1998,6 +2013,16 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
|
||||
if (!ovl_should_sync(ofs)) {
|
||||
ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
|
||||
if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
|
||||
err = -EIO;
|
||||
pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_get_workdir(sb, ofs, &upperpath);
|
||||
if (err)
|
||||
goto out_err;
|
||||
@ -2005,9 +2030,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
|
||||
if (!ofs->workdir)
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
|
||||
sb->s_stack_depth = ovl_upper_mnt(ofs)->mnt_sb->s_stack_depth;
|
||||
sb->s_time_gran = ovl_upper_mnt(ofs)->mnt_sb->s_time_gran;
|
||||
|
||||
sb->s_stack_depth = upper_sb->s_stack_depth;
|
||||
sb->s_time_gran = upper_sb->s_time_gran;
|
||||
}
|
||||
oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
|
||||
err = PTR_ERR(oe);
|
||||
|
@ -962,3 +962,30 @@ err_free:
|
||||
kfree(buf);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
/*
|
||||
* ovl_sync_status() - Check fs sync status for volatile mounts
|
||||
*
|
||||
* Returns 1 if this is not a volatile mount and a real sync is required.
|
||||
*
|
||||
* Returns 0 if syncing can be skipped because mount is volatile, and no errors
|
||||
* have occurred on the upperdir since the mount.
|
||||
*
|
||||
* Returns -errno if it is a volatile mount, and the error that occurred since
|
||||
* the last mount. If the error code changes, it'll return the latest error
|
||||
* code.
|
||||
*/
|
||||
|
||||
int ovl_sync_status(struct ovl_fs *ofs)
|
||||
{
|
||||
struct vfsmount *mnt;
|
||||
|
||||
if (ovl_should_sync(ofs))
|
||||
return 1;
|
||||
|
||||
mnt = ovl_upper_mnt(ofs);
|
||||
if (!mnt)
|
||||
return 0;
|
||||
|
||||
return errseq_check(&mnt->mnt_sb->s_wb_err, ofs->errseq);
|
||||
}
|
||||
|
@ -371,10 +371,11 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
|
||||
{
|
||||
int size, ret;
|
||||
kuid_t kroot;
|
||||
u32 nsmagic, magic;
|
||||
uid_t root, mappedroot;
|
||||
char *tmpbuf = NULL;
|
||||
struct vfs_cap_data *cap;
|
||||
struct vfs_ns_cap_data *nscap;
|
||||
struct vfs_ns_cap_data *nscap = NULL;
|
||||
struct dentry *dentry;
|
||||
struct user_namespace *fs_ns;
|
||||
|
||||
@ -396,46 +397,61 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
|
||||
fs_ns = inode->i_sb->s_user_ns;
|
||||
cap = (struct vfs_cap_data *) tmpbuf;
|
||||
if (is_v2header((size_t) ret, cap)) {
|
||||
/* If this is sizeof(vfs_cap_data) then we're ok with the
|
||||
* on-disk value, so return that. */
|
||||
if (alloc)
|
||||
*buffer = tmpbuf;
|
||||
else
|
||||
kfree(tmpbuf);
|
||||
return ret;
|
||||
} else if (!is_v3header((size_t) ret, cap)) {
|
||||
kfree(tmpbuf);
|
||||
return -EINVAL;
|
||||
root = 0;
|
||||
} else if (is_v3header((size_t) ret, cap)) {
|
||||
nscap = (struct vfs_ns_cap_data *) tmpbuf;
|
||||
root = le32_to_cpu(nscap->rootid);
|
||||
} else {
|
||||
size = -EINVAL;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
nscap = (struct vfs_ns_cap_data *) tmpbuf;
|
||||
root = le32_to_cpu(nscap->rootid);
|
||||
kroot = make_kuid(fs_ns, root);
|
||||
|
||||
/* If the root kuid maps to a valid uid in current ns, then return
|
||||
* this as a nscap. */
|
||||
mappedroot = from_kuid(current_user_ns(), kroot);
|
||||
if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
|
||||
size = sizeof(struct vfs_ns_cap_data);
|
||||
if (alloc) {
|
||||
*buffer = tmpbuf;
|
||||
if (!nscap) {
|
||||
/* v2 -> v3 conversion */
|
||||
nscap = kzalloc(size, GFP_ATOMIC);
|
||||
if (!nscap) {
|
||||
size = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
nsmagic = VFS_CAP_REVISION_3;
|
||||
magic = le32_to_cpu(cap->magic_etc);
|
||||
if (magic & VFS_CAP_FLAGS_EFFECTIVE)
|
||||
nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
|
||||
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
|
||||
nscap->magic_etc = cpu_to_le32(nsmagic);
|
||||
} else {
|
||||
/* use allocated v3 buffer */
|
||||
tmpbuf = NULL;
|
||||
}
|
||||
nscap->rootid = cpu_to_le32(mappedroot);
|
||||
} else
|
||||
kfree(tmpbuf);
|
||||
return size;
|
||||
*buffer = nscap;
|
||||
}
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!rootid_owns_currentns(kroot)) {
|
||||
kfree(tmpbuf);
|
||||
return -EOPNOTSUPP;
|
||||
size = -EOVERFLOW;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
/* This comes from a parent namespace. Return as a v2 capability */
|
||||
size = sizeof(struct vfs_cap_data);
|
||||
if (alloc) {
|
||||
*buffer = kmalloc(size, GFP_ATOMIC);
|
||||
if (*buffer) {
|
||||
struct vfs_cap_data *cap = *buffer;
|
||||
__le32 nsmagic, magic;
|
||||
if (nscap) {
|
||||
/* v3 -> v2 conversion */
|
||||
cap = kzalloc(size, GFP_ATOMIC);
|
||||
if (!cap) {
|
||||
size = -ENOMEM;
|
||||
goto out_free;
|
||||
}
|
||||
magic = VFS_CAP_REVISION_2;
|
||||
nsmagic = le32_to_cpu(nscap->magic_etc);
|
||||
if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
|
||||
@ -443,9 +459,12 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
|
||||
memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
|
||||
cap->magic_etc = cpu_to_le32(magic);
|
||||
} else {
|
||||
size = -ENOMEM;
|
||||
/* use unconverted v2 */
|
||||
tmpbuf = NULL;
|
||||
}
|
||||
*buffer = cap;
|
||||
}
|
||||
out_free:
|
||||
kfree(tmpbuf);
|
||||
return size;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user