2019-06-04 11:11:33 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2016-12-16 13:02:56 +03:00
/*
*
* Copyright ( C ) 2011 Novell Inc .
* Copyright ( C ) 2016 Red Hat , Inc .
*/
struct ovl_config {
char * lowerdir ;
char * upperdir ;
char * workdir ;
bool default_permissions ;
2016-12-16 13:02:56 +03:00
bool redirect_dir ;
2017-12-11 13:28:10 +03:00
bool redirect_follow ;
const char * redirect_mode ;
2017-06-21 15:28:36 +03:00
bool index ;
ovl: introduce new "uuid=off" option for inodes index feature
This replaces uuid with null in overlayfs file handles and thus relaxes
uuid checks for overlay index feature. It is only possible in case there is
only one filesystem for all the work/upper/lower directories and bare file
handles from this backing filesystem are unique. In other case when we have
multiple filesystems lets just fallback to "uuid=on" which is and
equivalent of how it worked before with all uuid checks.
This is needed when overlayfs is/was mounted in a container with index
enabled (e.g.: to be able to resolve inotify watch file handles on it to
paths in CRIU), and this container is copied and started alongside with the
original one. This way the "copy" container can't have the same uuid on the
superblock and mounting the overlayfs from it later would fail.
That is an example of the problem on top of loop+ext4:
dd if=/dev/zero of=loopbackfile.img bs=100M count=10
losetup -fP loopbackfile.img
losetup -a
#/dev/loop0: [64768]:35 (/loop-test/loopbackfile.img)
mkfs.ext4 loopbackfile.img
mkdir loop-mp
mount -o loop /dev/loop0 loop-mp
mkdir loop-mp/{lower,upper,work,merged}
mount -t overlay overlay -oindex=on,lowerdir=loop-mp/lower,\
upperdir=loop-mp/upper,workdir=loop-mp/work loop-mp/merged
umount loop-mp/merged
umount loop-mp
e2fsck -f /dev/loop0
tune2fs -U random /dev/loop0
mount -o loop /dev/loop0 loop-mp
mount -t overlay overlay -oindex=on,lowerdir=loop-mp/lower,\
upperdir=loop-mp/upper,workdir=loop-mp/work loop-mp/merged
#mount: /loop-test/loop-mp/merged:
#mount(2) system call failed: Stale file handle.
If you just change the uuid of the backing filesystem, overlay is not
mounting any more. In Virtuozzo we copy container disks (ploops) when
create the copy of container and we require fs uuid to be unique for a new
container.
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2020-10-13 17:59:54 +03:00
bool uuid ;
2018-01-19 12:26:53 +03:00
bool nfs_export ;
2018-03-29 09:08:18 +03:00
int xino ;
2018-05-11 18:49:27 +03:00
bool metacopy ;
2020-12-14 17:26:14 +03:00
bool userxattr ;
2020-08-31 21:15:29 +03:00
bool ovl_volatile ;
2016-12-16 13:02:56 +03:00
} ;
2018-03-28 20:22:41 +03:00
struct ovl_sb {
struct super_block * sb ;
dev_t pseudo_dev ;
2019-11-14 23:28:41 +03:00
/* Unusable (conflicting) uuid */
bool bad_uuid ;
2019-11-16 19:52:20 +03:00
/* Used as a lower layer (but maybe also as upper) */
bool is_lower ;
2018-03-28 20:22:41 +03:00
} ;
2017-07-24 09:57:54 +03:00
struct ovl_layer {
struct vfsmount * mnt ;
ovl: detect overlapping layers
Overlapping overlay layers are not supported and can cause unexpected
behavior, but overlayfs does not currently check or warn about these
configurations.
User is not supposed to specify the same directory for upper and
lower dirs or for different lower layers and user is not supposed to
specify directories that are descendants of each other for overlay
layers, but that is exactly what this zysbot repro did:
https://syzkaller.appspot.com/x/repro.syz?x=12c7a94f400000
Moving layer root directories into other layers while overlayfs
is mounted could also result in unexpected behavior.
This commit places "traps" in the overlay inode hash table.
Those traps are dummy overlay inodes that are hashed by the layers
root inodes.
On mount, the hash table trap entries are used to verify that overlay
layers are not overlapping. While at it, we also verify that overlay
layers are not overlapping with directories "in-use" by other overlay
instances as upperdir/workdir.
On lookup, the trap entries are used to verify that overlay layers
root inodes have not been moved into other layers after mount.
Some examples:
$ ./run --ov --samefs -s
...
( mkdir -p base/upper/0/u base/upper/0/w base/lower lower upper mnt
mount -o bind base/lower lower
mount -o bind base/upper upper
mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w)
$ umount mnt
$ mount -t overlay none mnt ...
-o lowerdir=base,upperdir=upper/0/u,workdir=upper/0/w
[ 94.434900] overlayfs: overlapping upperdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=upper/0/u,upperdir=upper/0/u,workdir=upper/0/w
[ 151.350132] overlayfs: conflicting lowerdir path
mount: none is already mounted or mnt busy
$ mount -t overlay none mnt ...
-o lowerdir=lower:lower/a,upperdir=upper/0/u,workdir=upper/0/w
[ 201.205045] overlayfs: overlapping lowerdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w
$ mv base/upper/0/ base/lower/
$ find mnt/0
mnt/0
mnt/0/w
find: 'mnt/0/w/work': Too many levels of symbolic links
find: 'mnt/0/u': Too many levels of symbolic links
Reported-by: syzbot+9c69c282adc4edd2b540@syzkaller.appspotmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2019-04-18 17:42:08 +03:00
/* Trap in ovl inode cache */
struct inode * trap ;
2018-03-28 20:22:41 +03:00
struct ovl_sb * fs ;
/* Index of this layer in fs root (upper idx == 0) */
2017-11-08 20:23:36 +03:00
int idx ;
2018-03-28 20:22:41 +03:00
/* One fsid per unique underlying sb (upper fsid == 0) */
int fsid ;
2017-07-24 09:57:54 +03:00
} ;
struct ovl_path {
2020-01-24 11:46:45 +03:00
const struct ovl_layer * layer ;
2017-07-24 09:57:54 +03:00
struct dentry * dentry ;
} ;
2016-12-16 13:02:56 +03:00
/* private information held for overlayfs's superblock */
struct ovl_fs {
2019-11-15 15:12:40 +03:00
unsigned int numlayer ;
2020-01-14 22:59:22 +03:00
/* Number of unique fs among layers including upper fs */
unsigned int numfs ;
2020-01-24 11:46:45 +03:00
const struct ovl_layer * layers ;
2020-01-14 22:59:22 +03:00
struct ovl_sb * fs ;
2017-06-21 15:28:33 +03:00
/* workbasedir is the path at workdir= mount option */
struct dentry * workbasedir ;
/* workdir is the 'work' directory under workbasedir */
2016-12-16 13:02:56 +03:00
struct dentry * workdir ;
2017-06-21 15:28:36 +03:00
/* index directory listing overlay inodes by origin file handle */
struct dentry * indexdir ;
2016-12-16 13:02:56 +03:00
long namelen ;
2016-12-16 13:02:56 +03:00
/* pathnames of lower and upper dirs, for show_options */
struct ovl_config config ;
/* creds of process who forced instantiation of super block */
const struct cred * creator_cred ;
2017-01-17 07:34:53 +03:00
bool tmpfile ;
2017-05-17 00:12:40 +03:00
bool noxattr ;
2017-09-29 10:21:21 +03:00
/* Did we take the inuse lock? */
bool upperdir_locked ;
bool workdir_locked ;
2020-04-24 05:55:17 +03:00
bool share_whiteout ;
ovl: detect overlapping layers
Overlapping overlay layers are not supported and can cause unexpected
behavior, but overlayfs does not currently check or warn about these
configurations.
User is not supposed to specify the same directory for upper and
lower dirs or for different lower layers and user is not supposed to
specify directories that are descendants of each other for overlay
layers, but that is exactly what this zysbot repro did:
https://syzkaller.appspot.com/x/repro.syz?x=12c7a94f400000
Moving layer root directories into other layers while overlayfs
is mounted could also result in unexpected behavior.
This commit places "traps" in the overlay inode hash table.
Those traps are dummy overlay inodes that are hashed by the layers
root inodes.
On mount, the hash table trap entries are used to verify that overlay
layers are not overlapping. While at it, we also verify that overlay
layers are not overlapping with directories "in-use" by other overlay
instances as upperdir/workdir.
On lookup, the trap entries are used to verify that overlay layers
root inodes have not been moved into other layers after mount.
Some examples:
$ ./run --ov --samefs -s
...
( mkdir -p base/upper/0/u base/upper/0/w base/lower lower upper mnt
mount -o bind base/lower lower
mount -o bind base/upper upper
mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w)
$ umount mnt
$ mount -t overlay none mnt ...
-o lowerdir=base,upperdir=upper/0/u,workdir=upper/0/w
[ 94.434900] overlayfs: overlapping upperdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=upper/0/u,upperdir=upper/0/u,workdir=upper/0/w
[ 151.350132] overlayfs: conflicting lowerdir path
mount: none is already mounted or mnt busy
$ mount -t overlay none mnt ...
-o lowerdir=lower:lower/a,upperdir=upper/0/u,workdir=upper/0/w
[ 201.205045] overlayfs: overlapping lowerdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w
$ mv base/upper/0/ base/lower/
$ find mnt/0
mnt/0
mnt/0/w
find: 'mnt/0/w/work': Too many levels of symbolic links
find: 'mnt/0/u': Too many levels of symbolic links
Reported-by: syzbot+9c69c282adc4edd2b540@syzkaller.appspotmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2019-04-18 17:42:08 +03:00
/* Traps in ovl inode cache */
2019-07-12 15:24:34 +03:00
struct inode * workbasedir_trap ;
ovl: detect overlapping layers
Overlapping overlay layers are not supported and can cause unexpected
behavior, but overlayfs does not currently check or warn about these
configurations.
User is not supposed to specify the same directory for upper and
lower dirs or for different lower layers and user is not supposed to
specify directories that are descendants of each other for overlay
layers, but that is exactly what this zysbot repro did:
https://syzkaller.appspot.com/x/repro.syz?x=12c7a94f400000
Moving layer root directories into other layers while overlayfs
is mounted could also result in unexpected behavior.
This commit places "traps" in the overlay inode hash table.
Those traps are dummy overlay inodes that are hashed by the layers
root inodes.
On mount, the hash table trap entries are used to verify that overlay
layers are not overlapping. While at it, we also verify that overlay
layers are not overlapping with directories "in-use" by other overlay
instances as upperdir/workdir.
On lookup, the trap entries are used to verify that overlay layers
root inodes have not been moved into other layers after mount.
Some examples:
$ ./run --ov --samefs -s
...
( mkdir -p base/upper/0/u base/upper/0/w base/lower lower upper mnt
mount -o bind base/lower lower
mount -o bind base/upper upper
mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w)
$ umount mnt
$ mount -t overlay none mnt ...
-o lowerdir=base,upperdir=upper/0/u,workdir=upper/0/w
[ 94.434900] overlayfs: overlapping upperdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=upper/0/u,upperdir=upper/0/u,workdir=upper/0/w
[ 151.350132] overlayfs: conflicting lowerdir path
mount: none is already mounted or mnt busy
$ mount -t overlay none mnt ...
-o lowerdir=lower:lower/a,upperdir=upper/0/u,workdir=upper/0/w
[ 201.205045] overlayfs: overlapping lowerdir path
mount: mount overlay on mnt failed: Too many levels of symbolic links
$ mount -t overlay none mnt ...
-o lowerdir=lower,upperdir=upper/0/u,workdir=upper/0/w
$ mv base/upper/0/ base/lower/
$ find mnt/0
mnt/0
mnt/0/w
find: 'mnt/0/w/work': Too many levels of symbolic links
find: 'mnt/0/u': Too many levels of symbolic links
Reported-by: syzbot+9c69c282adc4edd2b540@syzkaller.appspotmail.com
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2019-04-18 17:42:08 +03:00
struct inode * workdir_trap ;
struct inode * indexdir_trap ;
2019-11-16 19:14:41 +03:00
/* -1: disabled, 0: same fs, 1..32: number of unused ino bits */
int xino_mode ;
2020-02-21 17:34:43 +03:00
/* For allocation of non-persistent inode numbers */
atomic_long_t last_ino ;
2020-04-24 05:55:17 +03:00
/* Whiteout dentry cache */
struct dentry * whiteout ;
ovl: implement volatile-specific fsync error behaviour
Overlayfs's volatile option allows the user to bypass all forced sync calls
to the upperdir filesystem. This comes at the cost of safety. We can never
ensure that the user's data is intact, but we can make a best effort to
expose whether or not the data is likely to be in a bad state.
The best way to handle this in the time being is that if an overlayfs's
upperdir experiences an error after a volatile mount occurs, that error
will be returned on fsync, fdatasync, sync, and syncfs. This is
contradictory to the traditional behaviour of VFS which fails the call
once, and only raises an error if a subsequent fsync error has occurred,
and been raised by the filesystem.
One awkward aspect of the patch is that we have to manually set the
superblock's errseq_t after the sync_fs callback as opposed to just
returning an error from syncfs. This is because the call chain looks
something like this:
sys_syncfs ->
sync_filesystem ->
__sync_filesystem ->
/* The return value is ignored here
sb->s_op->sync_fs(sb)
_sync_blockdev
/* Where the VFS fetches the error to raise to userspace */
errseq_check_and_advance
Because of this we call errseq_set every time the sync_fs callback occurs.
Due to the nature of this seen / unseen dichotomy, if the upperdir is an
inconsistent state at the initial mount time, overlayfs will refuse to
mount, as overlayfs cannot get a snapshot of the upperdir's errseq that
will increment on error until the user calls syncfs.
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Fixes: c86243b090bc ("ovl: provide a mount option "volatile"")
Cc: stable@vger.kernel.org
Reviewed-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2021-01-08 03:10:43 +03:00
/* r/o snapshot of upperdir sb's only taken on volatile mounts */
errseq_t errseq ;
2016-12-16 13:02:56 +03:00
} ;
2020-06-04 11:48:19 +03:00
static inline struct vfsmount * ovl_upper_mnt ( struct ovl_fs * ofs )
{
2020-06-04 11:48:19 +03:00
return ofs - > layers [ 0 ] . mnt ;
2020-06-04 11:48:19 +03:00
}
2019-11-16 19:14:41 +03:00
static inline struct ovl_fs * OVL_FS ( struct super_block * sb )
{
return ( struct ovl_fs * ) sb - > s_fs_info ;
}
2020-08-31 21:15:29 +03:00
static inline bool ovl_should_sync ( struct ovl_fs * ofs )
{
return ! ofs - > config . ovl_volatile ;
}
2016-12-16 13:02:56 +03:00
/* private information held for every overlayfs dentry */
struct ovl_entry {
union {
2017-07-04 23:03:18 +03:00
struct {
2018-01-14 20:25:31 +03:00
unsigned long flags ;
2017-07-04 23:03:18 +03:00
} ;
2016-12-16 13:02:56 +03:00
struct rcu_head rcu ;
} ;
unsigned numlower ;
2017-07-24 09:57:54 +03:00
struct ovl_path lowerstack [ ] ;
2016-12-16 13:02:56 +03:00
} ;
struct ovl_entry * ovl_alloc_entry ( unsigned int numlower ) ;
2018-01-14 20:25:31 +03:00
static inline struct ovl_entry * OVL_E ( struct dentry * dentry )
{
return ( struct ovl_entry * ) dentry - > d_fsdata ;
}
2017-06-12 09:54:40 +03:00
struct ovl_inode {
2018-05-11 18:49:30 +03:00
union {
struct ovl_dir_cache * cache ; /* directory */
struct inode * lowerdata ; /* regular file */
} ;
2017-07-04 23:03:16 +03:00
const char * redirect ;
2017-07-04 23:03:16 +03:00
u64 version ;
2017-07-04 23:03:16 +03:00
unsigned long flags ;
2017-06-12 09:54:40 +03:00
struct inode vfs_inode ;
2017-07-04 23:03:16 +03:00
struct dentry * __upperdentry ;
2017-07-04 23:03:16 +03:00
struct inode * lower ;
2017-06-21 15:28:51 +03:00
/* synchronize copy up and more */
struct mutex lock ;
2017-06-12 09:54:40 +03:00
} ;
static inline struct ovl_inode * OVL_I ( struct inode * inode )
{
return container_of ( inode , struct ovl_inode , vfs_inode ) ;
}
2017-07-04 23:03:16 +03:00
static inline struct dentry * ovl_upperdentry_dereference ( struct ovl_inode * oi )
{
2017-10-24 13:22:48 +03:00
return READ_ONCE ( oi - > __upperdentry ) ;
2017-07-04 23:03:16 +03:00
}