ovl: make private mounts longterm

Overlayfs is using clone_private_mount() to create internal mounts for underlying layers. These are used for operations requiring a path, such as dentry_open(). Since these private mounts are not in any namespace they are treated as short term, "detached" mounts and mntput() involves taking the global mount_lock, which can result in serious cacheline pingpong. Make these private mounts longterm instead, which trade the penalty on mntput() for a slightly longer shutdown time due to an added RCU grace period when putting these mounts. Introduce a new helper kern_unmount_many() that can take care of multiple longterm mounts with a single RCU grace period. Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2020-06-04 10:48:19 +02:00 · 2020-06-04 10:48:19 +02:00 · df820f8de4
commit df820f8de4
parent b8e42a651b
4 changed files with 31 additions and 1 deletions
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@ -858,3 +858,10 @@ be misspelled d_alloc_anon().
 [should've been added in 2016] stale comment in finish_open() nonwithstanding,
 failure exits in ->atomic_open() instances should *NOT* fput() the file,
 no matter what.  Everything is handled by the caller.
 ---
 **mandatory**
 clone_private_mount() returns a longterm mount now, so the proper destructor of
 its result is kern_unmount() or kern_unmount_array().
--- a/fs/namespace.c
+++ b/fs/namespace.c
@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path)
 	if (IS_ERR(new_mnt))
 		return ERR_CAST(new_mnt);
 	/* Longterm mount to be removed by kern_unmount*() */
 	new_mnt->mnt_ns = MNT_NS_INTERNAL;
 	return &new_mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(clone_private_mount);
@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt)
 }
 EXPORT_SYMBOL(kern_unmount);
 void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
 {
 	unsigned int i;
 	for (i = 0; i < num; i++)
 		if (mnt[i])
 			real_mount(mnt[i])->mnt_ns = NULL;
 	synchronize_rcu_expedited();
 	for (i = 0; i < num; i++)
 		mntput(mnt[i]);
 }
 EXPORT_SYMBOL(kern_unmount_array);
 bool our_mnt(struct vfsmount *mnt)
 {
 	return check_mnt(real_mount(mnt));
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)
 static void ovl_free_fs(struct ovl_fs *ofs)
 {
 	struct vfsmount **mounts;
 	unsigned i;
 	iput(ofs->workbasedir_trap);
@ -224,10 +225,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
 	dput(ofs->workbasedir);
 	if (ofs->upperdir_locked)
 		ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
 	/* Hack!  Reuse ofs->layers as a vfsmount array before freeing it */
 	mounts = (struct vfsmount **) ofs->layers;
 	for (i = 0; i < ofs->numlayer; i++) {
 		iput(ofs->layers[i].trap);
-		mntput(ofs->layers[i].mnt);
+		mounts[i] = ofs->layers[i].mnt;
 	}
 	kern_unmount_array(mounts, ofs->numlayer);
 	kfree(ofs->layers);
 	for (i = 0; i < ofs->numfs; i++)
 		free_anon_bdev(ofs->fs[i].pseudo_dev);
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max;
 extern bool path_is_mountpoint(const struct path *path);
 extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
 #endif /* _LINUX_MOUNT_H */