overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
/*
*
* Copyright ( C ) 2011 Novell Inc .
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation .
*/
# include <linux/fs.h>
# include <linux/namei.h>
# include <linux/xattr.h>
# include <linux/security.h>
# include <linux/cred.h>
# include "overlayfs.h"
void ovl_cleanup ( struct inode * wdir , struct dentry * wdentry )
{
int err ;
dget ( wdentry ) ;
if ( S_ISDIR ( wdentry - > d_inode - > i_mode ) )
err = ovl_do_rmdir ( wdir , wdentry ) ;
else
err = ovl_do_unlink ( wdir , wdentry ) ;
dput ( wdentry ) ;
if ( err ) {
pr_err ( " overlayfs: cleanup of '%pd2' failed (%i) \n " ,
wdentry , err ) ;
}
}
struct dentry * ovl_lookup_temp ( struct dentry * workdir , struct dentry * dentry )
{
struct dentry * temp ;
char name [ 20 ] ;
snprintf ( name , sizeof ( name ) , " #%lx " , ( unsigned long ) dentry ) ;
temp = lookup_one_len ( name , workdir , strlen ( name ) ) ;
if ( ! IS_ERR ( temp ) & & temp - > d_inode ) {
pr_err ( " overlayfs: workdir/%s already exists \n " , name ) ;
dput ( temp ) ;
temp = ERR_PTR ( - EIO ) ;
}
return temp ;
}
/* caller holds i_mutex on workdir */
static struct dentry * ovl_whiteout ( struct dentry * workdir ,
struct dentry * dentry )
{
int err ;
struct dentry * whiteout ;
struct inode * wdir = workdir - > d_inode ;
whiteout = ovl_lookup_temp ( workdir , dentry ) ;
if ( IS_ERR ( whiteout ) )
return whiteout ;
err = ovl_do_whiteout ( wdir , whiteout ) ;
if ( err ) {
dput ( whiteout ) ;
whiteout = ERR_PTR ( err ) ;
}
return whiteout ;
}
int ovl_create_real ( struct inode * dir , struct dentry * newdentry ,
struct kstat * stat , const char * link ,
struct dentry * hardlink , bool debug )
{
int err ;
if ( newdentry - > d_inode )
return - ESTALE ;
if ( hardlink ) {
err = ovl_do_link ( hardlink , dir , newdentry , debug ) ;
} else {
switch ( stat - > mode & S_IFMT ) {
case S_IFREG :
err = ovl_do_create ( dir , newdentry , stat - > mode , debug ) ;
break ;
case S_IFDIR :
err = ovl_do_mkdir ( dir , newdentry , stat - > mode , debug ) ;
break ;
case S_IFCHR :
case S_IFBLK :
case S_IFIFO :
case S_IFSOCK :
err = ovl_do_mknod ( dir , newdentry ,
stat - > mode , stat - > rdev , debug ) ;
break ;
case S_IFLNK :
err = ovl_do_symlink ( dir , newdentry , link , debug ) ;
break ;
default :
err = - EPERM ;
}
}
if ( ! err & & WARN_ON ( ! newdentry - > d_inode ) ) {
/*
* Not quite sure if non - instantiated dentry is legal or not .
* VFS doesn ' t seem to care so check and warn here .
*/
err = - ENOENT ;
}
return err ;
}
static int ovl_set_opaque ( struct dentry * upperdentry )
{
return ovl_do_setxattr ( upperdentry , ovl_opaque_xattr , " y " , 1 , 0 ) ;
}
static void ovl_remove_opaque ( struct dentry * upperdentry )
{
int err ;
err = ovl_do_removexattr ( upperdentry , ovl_opaque_xattr ) ;
if ( err ) {
pr_warn ( " overlayfs: failed to remove opaque from '%s' (%i) \n " ,
upperdentry - > d_name . name , err ) ;
}
}
static int ovl_dir_getattr ( struct vfsmount * mnt , struct dentry * dentry ,
struct kstat * stat )
{
int err ;
enum ovl_path_type type ;
struct path realpath ;
type = ovl_path_real ( dentry , & realpath ) ;
err = vfs_getattr ( & realpath , stat ) ;
if ( err )
return err ;
stat - > dev = dentry - > d_sb - > s_dev ;
stat - > ino = dentry - > d_inode - > i_ino ;
/*
* It ' s probably not worth it to count subdirs to get the
* correct link count . nlink = 1 seems to pacify ' find ' and
* other utilities .
*/
if ( type = = OVL_PATH_MERGE )
stat - > nlink = 1 ;
return 0 ;
}
static int ovl_create_upper ( struct dentry * dentry , struct inode * inode ,
struct kstat * stat , const char * link ,
struct dentry * hardlink )
{
struct dentry * upperdir = ovl_dentry_upper ( dentry - > d_parent ) ;
struct inode * udir = upperdir - > d_inode ;
struct dentry * newdentry ;
int err ;
mutex_lock_nested ( & udir - > i_mutex , I_MUTEX_PARENT ) ;
newdentry = lookup_one_len ( dentry - > d_name . name , upperdir ,
dentry - > d_name . len ) ;
err = PTR_ERR ( newdentry ) ;
if ( IS_ERR ( newdentry ) )
goto out_unlock ;
err = ovl_create_real ( udir , newdentry , stat , link , hardlink , false ) ;
if ( err )
goto out_dput ;
ovl_dentry_version_inc ( dentry - > d_parent ) ;
ovl_dentry_update ( dentry , newdentry ) ;
ovl_copyattr ( newdentry - > d_inode , inode ) ;
d_instantiate ( dentry , inode ) ;
newdentry = NULL ;
out_dput :
dput ( newdentry ) ;
out_unlock :
mutex_unlock ( & udir - > i_mutex ) ;
return err ;
}
static int ovl_lock_rename_workdir ( struct dentry * workdir ,
struct dentry * upperdir )
{
/* Workdir should not be the same as upperdir */
if ( workdir = = upperdir )
goto err ;
/* Workdir should not be subdir of upperdir and vice versa */
if ( lock_rename ( workdir , upperdir ) ! = NULL )
goto err_unlock ;
return 0 ;
err_unlock :
unlock_rename ( workdir , upperdir ) ;
err :
pr_err ( " overlayfs: failed to lock workdir+upperdir \n " ) ;
return - EIO ;
}
static struct dentry * ovl_clear_empty ( struct dentry * dentry ,
struct list_head * list )
{
struct dentry * workdir = ovl_workdir ( dentry ) ;
struct inode * wdir = workdir - > d_inode ;
struct dentry * upperdir = ovl_dentry_upper ( dentry - > d_parent ) ;
struct inode * udir = upperdir - > d_inode ;
struct path upperpath ;
struct dentry * upper ;
struct dentry * opaquedir ;
struct kstat stat ;
int err ;
err = ovl_lock_rename_workdir ( workdir , upperdir ) ;
if ( err )
goto out ;
ovl_path_upper ( dentry , & upperpath ) ;
err = vfs_getattr ( & upperpath , & stat ) ;
if ( err )
goto out_unlock ;
err = - ESTALE ;
if ( ! S_ISDIR ( stat . mode ) )
goto out_unlock ;
upper = upperpath . dentry ;
if ( upper - > d_parent - > d_inode ! = udir )
goto out_unlock ;
opaquedir = ovl_lookup_temp ( workdir , dentry ) ;
err = PTR_ERR ( opaquedir ) ;
if ( IS_ERR ( opaquedir ) )
goto out_unlock ;
err = ovl_create_real ( wdir , opaquedir , & stat , NULL , NULL , true ) ;
if ( err )
goto out_dput ;
err = ovl_copy_xattr ( upper , opaquedir ) ;
if ( err )
goto out_cleanup ;
err = ovl_set_opaque ( opaquedir ) ;
if ( err )
goto out_cleanup ;
mutex_lock ( & opaquedir - > d_inode - > i_mutex ) ;
err = ovl_set_attr ( opaquedir , & stat ) ;
mutex_unlock ( & opaquedir - > d_inode - > i_mutex ) ;
if ( err )
goto out_cleanup ;
err = ovl_do_rename ( wdir , opaquedir , udir , upper , RENAME_EXCHANGE ) ;
if ( err )
goto out_cleanup ;
ovl_cleanup_whiteouts ( upper , list ) ;
ovl_cleanup ( wdir , upper ) ;
unlock_rename ( workdir , upperdir ) ;
/* dentry's upper doesn't match now, get rid of it */
d_drop ( dentry ) ;
return opaquedir ;
out_cleanup :
ovl_cleanup ( wdir , opaquedir ) ;
out_dput :
dput ( opaquedir ) ;
out_unlock :
unlock_rename ( workdir , upperdir ) ;
out :
return ERR_PTR ( err ) ;
}
2014-11-20 16:39:59 +01:00
static struct dentry * ovl_check_empty_and_clear ( struct dentry * dentry )
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
{
int err ;
struct dentry * ret = NULL ;
LIST_HEAD ( list ) ;
err = ovl_check_empty_dir ( dentry , & list ) ;
if ( err )
ret = ERR_PTR ( err ) ;
2014-11-20 16:39:59 +01:00
else {
/*
* If no upperdentry then skip clearing whiteouts .
*
* Can race with copy - up , since we don ' t hold the upperdir
* mutex . Doesn ' t matter , since copy - up can ' t create a
* non - empty directory from an empty one .
*/
if ( ovl_dentry_upper ( dentry ) )
ret = ovl_clear_empty ( dentry , & list ) ;
}
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
ovl_cache_free ( & list ) ;
return ret ;
}
static int ovl_create_over_whiteout ( struct dentry * dentry , struct inode * inode ,
struct kstat * stat , const char * link ,
struct dentry * hardlink )
{
struct dentry * workdir = ovl_workdir ( dentry ) ;
struct inode * wdir = workdir - > d_inode ;
struct dentry * upperdir = ovl_dentry_upper ( dentry - > d_parent ) ;
struct inode * udir = upperdir - > d_inode ;
struct dentry * upper ;
struct dentry * newdentry ;
int err ;
err = ovl_lock_rename_workdir ( workdir , upperdir ) ;
if ( err )
goto out ;
newdentry = ovl_lookup_temp ( workdir , dentry ) ;
err = PTR_ERR ( newdentry ) ;
if ( IS_ERR ( newdentry ) )
goto out_unlock ;
upper = lookup_one_len ( dentry - > d_name . name , upperdir ,
dentry - > d_name . len ) ;
err = PTR_ERR ( upper ) ;
if ( IS_ERR ( upper ) )
goto out_dput ;
err = ovl_create_real ( wdir , newdentry , stat , link , hardlink , true ) ;
if ( err )
goto out_dput2 ;
if ( S_ISDIR ( stat - > mode ) ) {
err = ovl_set_opaque ( newdentry ) ;
if ( err )
goto out_cleanup ;
err = ovl_do_rename ( wdir , newdentry , udir , upper ,
RENAME_EXCHANGE ) ;
if ( err )
goto out_cleanup ;
ovl_cleanup ( wdir , upper ) ;
} else {
err = ovl_do_rename ( wdir , newdentry , udir , upper , 0 ) ;
if ( err )
goto out_cleanup ;
}
ovl_dentry_version_inc ( dentry - > d_parent ) ;
ovl_dentry_update ( dentry , newdentry ) ;
ovl_copyattr ( newdentry - > d_inode , inode ) ;
d_instantiate ( dentry , inode ) ;
newdentry = NULL ;
out_dput2 :
dput ( upper ) ;
out_dput :
dput ( newdentry ) ;
out_unlock :
unlock_rename ( workdir , upperdir ) ;
out :
return err ;
out_cleanup :
ovl_cleanup ( wdir , newdentry ) ;
goto out_dput2 ;
}
static int ovl_create_or_link ( struct dentry * dentry , int mode , dev_t rdev ,
const char * link , struct dentry * hardlink )
{
int err ;
struct inode * inode ;
struct kstat stat = {
. mode = mode ,
. rdev = rdev ,
} ;
err = - ENOMEM ;
inode = ovl_new_inode ( dentry - > d_sb , mode , dentry - > d_fsdata ) ;
if ( ! inode )
goto out ;
err = ovl_copy_up ( dentry - > d_parent ) ;
if ( err )
goto out_iput ;
if ( ! ovl_dentry_is_opaque ( dentry ) ) {
err = ovl_create_upper ( dentry , inode , & stat , link , hardlink ) ;
} else {
const struct cred * old_cred ;
struct cred * override_cred ;
err = - ENOMEM ;
override_cred = prepare_creds ( ) ;
if ( ! override_cred )
goto out_iput ;
/*
* CAP_SYS_ADMIN for setting opaque xattr
* CAP_DAC_OVERRIDE for create in workdir , rename
* CAP_FOWNER for removing whiteout from sticky dir
*/
cap_raise ( override_cred - > cap_effective , CAP_SYS_ADMIN ) ;
cap_raise ( override_cred - > cap_effective , CAP_DAC_OVERRIDE ) ;
cap_raise ( override_cred - > cap_effective , CAP_FOWNER ) ;
old_cred = override_creds ( override_cred ) ;
err = ovl_create_over_whiteout ( dentry , inode , & stat , link ,
hardlink ) ;
revert_creds ( old_cred ) ;
put_cred ( override_cred ) ;
}
if ( ! err )
inode = NULL ;
out_iput :
iput ( inode ) ;
out :
return err ;
}
static int ovl_create_object ( struct dentry * dentry , int mode , dev_t rdev ,
const char * link )
{
int err ;
err = ovl_want_write ( dentry ) ;
if ( ! err ) {
err = ovl_create_or_link ( dentry , mode , rdev , link , NULL ) ;
ovl_drop_write ( dentry ) ;
}
return err ;
}
static int ovl_create ( struct inode * dir , struct dentry * dentry , umode_t mode ,
bool excl )
{
return ovl_create_object ( dentry , ( mode & 07777 ) | S_IFREG , 0 , NULL ) ;
}
static int ovl_mkdir ( struct inode * dir , struct dentry * dentry , umode_t mode )
{
return ovl_create_object ( dentry , ( mode & 07777 ) | S_IFDIR , 0 , NULL ) ;
}
static int ovl_mknod ( struct inode * dir , struct dentry * dentry , umode_t mode ,
dev_t rdev )
{
/* Don't allow creation of "whiteout" on overlay */
if ( S_ISCHR ( mode ) & & rdev = = WHITEOUT_DEV )
return - EPERM ;
return ovl_create_object ( dentry , mode , rdev , NULL ) ;
}
static int ovl_symlink ( struct inode * dir , struct dentry * dentry ,
const char * link )
{
return ovl_create_object ( dentry , S_IFLNK , 0 , link ) ;
}
static int ovl_link ( struct dentry * old , struct inode * newdir ,
struct dentry * new )
{
int err ;
struct dentry * upper ;
err = ovl_want_write ( old ) ;
if ( err )
goto out ;
err = ovl_copy_up ( old ) ;
if ( err )
goto out_drop_write ;
upper = ovl_dentry_upper ( old ) ;
err = ovl_create_or_link ( new , upper - > d_inode - > i_mode , 0 , NULL , upper ) ;
out_drop_write :
ovl_drop_write ( old ) ;
out :
return err ;
}
2014-11-20 16:39:59 +01:00
static int ovl_remove_and_whiteout ( struct dentry * dentry , bool is_dir )
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
{
struct dentry * workdir = ovl_workdir ( dentry ) ;
struct inode * wdir = workdir - > d_inode ;
struct dentry * upperdir = ovl_dentry_upper ( dentry - > d_parent ) ;
struct inode * udir = upperdir - > d_inode ;
struct dentry * whiteout ;
struct dentry * upper ;
struct dentry * opaquedir = NULL ;
int err ;
if ( is_dir ) {
2014-11-20 16:39:59 +01:00
opaquedir = ovl_check_empty_and_clear ( dentry ) ;
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
err = PTR_ERR ( opaquedir ) ;
if ( IS_ERR ( opaquedir ) )
goto out ;
}
err = ovl_lock_rename_workdir ( workdir , upperdir ) ;
if ( err )
goto out_dput ;
whiteout = ovl_whiteout ( workdir , dentry ) ;
err = PTR_ERR ( whiteout ) ;
if ( IS_ERR ( whiteout ) )
goto out_unlock ;
2014-11-20 16:39:59 +01:00
upper = ovl_dentry_upper ( dentry ) ;
if ( ! upper ) {
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
upper = lookup_one_len ( dentry - > d_name . name , upperdir ,
2014-11-20 16:39:59 +01:00
dentry - > d_name . len ) ;
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
err = PTR_ERR ( upper ) ;
if ( IS_ERR ( upper ) )
goto kill_whiteout ;
err = ovl_do_rename ( wdir , whiteout , udir , upper , 0 ) ;
dput ( upper ) ;
if ( err )
goto kill_whiteout ;
} else {
int flags = 0 ;
if ( opaquedir )
upper = opaquedir ;
err = - ESTALE ;
if ( upper - > d_parent ! = upperdir )
goto kill_whiteout ;
if ( is_dir )
flags | = RENAME_EXCHANGE ;
err = ovl_do_rename ( wdir , whiteout , udir , upper , flags ) ;
if ( err )
goto kill_whiteout ;
if ( is_dir )
ovl_cleanup ( wdir , upper ) ;
}
ovl_dentry_version_inc ( dentry - > d_parent ) ;
out_d_drop :
d_drop ( dentry ) ;
dput ( whiteout ) ;
out_unlock :
unlock_rename ( workdir , upperdir ) ;
out_dput :
dput ( opaquedir ) ;
out :
return err ;
kill_whiteout :
ovl_cleanup ( wdir , whiteout ) ;
goto out_d_drop ;
}
static int ovl_remove_upper ( struct dentry * dentry , bool is_dir )
{
struct dentry * upperdir = ovl_dentry_upper ( dentry - > d_parent ) ;
struct inode * dir = upperdir - > d_inode ;
struct dentry * upper = ovl_dentry_upper ( dentry ) ;
int err ;
mutex_lock_nested ( & dir - > i_mutex , I_MUTEX_PARENT ) ;
err = - ESTALE ;
if ( upper - > d_parent = = upperdir ) {
/* Don't let d_delete() think it can reset d_inode */
dget ( upper ) ;
if ( is_dir )
err = vfs_rmdir ( dir , upper ) ;
else
err = vfs_unlink ( dir , upper , NULL ) ;
dput ( upper ) ;
ovl_dentry_version_inc ( dentry - > d_parent ) ;
}
/*
* Keeping this dentry hashed would mean having to release
* upperpath / lowerpath , which could only be done if we are the
* sole user of this dentry . Too tricky . . . Just unhash for
* now .
*/
d_drop ( dentry ) ;
mutex_unlock ( & dir - > i_mutex ) ;
return err ;
}
static inline int ovl_check_sticky ( struct dentry * dentry )
{
struct inode * dir = ovl_dentry_real ( dentry - > d_parent ) - > d_inode ;
struct inode * inode = ovl_dentry_real ( dentry ) - > d_inode ;
if ( check_sticky ( dir , inode ) )
return - EPERM ;
return 0 ;
}
static int ovl_do_remove ( struct dentry * dentry , bool is_dir )
{
enum ovl_path_type type ;
int err ;
err = ovl_check_sticky ( dentry ) ;
if ( err )
goto out ;
err = ovl_want_write ( dentry ) ;
if ( err )
goto out ;
err = ovl_copy_up ( dentry - > d_parent ) ;
if ( err )
goto out_drop_write ;
type = ovl_path_type ( dentry ) ;
if ( type = = OVL_PATH_PURE_UPPER ) {
err = ovl_remove_upper ( dentry , is_dir ) ;
} else {
const struct cred * old_cred ;
struct cred * override_cred ;
err = - ENOMEM ;
override_cred = prepare_creds ( ) ;
if ( ! override_cred )
goto out_drop_write ;
/*
* CAP_SYS_ADMIN for setting xattr on whiteout , opaque dir
* CAP_DAC_OVERRIDE for create in workdir , rename
* CAP_FOWNER for removing whiteout from sticky dir
* CAP_FSETID for chmod of opaque dir
* CAP_CHOWN for chown of opaque dir
*/
cap_raise ( override_cred - > cap_effective , CAP_SYS_ADMIN ) ;
cap_raise ( override_cred - > cap_effective , CAP_DAC_OVERRIDE ) ;
cap_raise ( override_cred - > cap_effective , CAP_FOWNER ) ;
cap_raise ( override_cred - > cap_effective , CAP_FSETID ) ;
cap_raise ( override_cred - > cap_effective , CAP_CHOWN ) ;
old_cred = override_creds ( override_cred ) ;
2014-11-20 16:39:59 +01:00
err = ovl_remove_and_whiteout ( dentry , is_dir ) ;
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
revert_creds ( old_cred ) ;
put_cred ( override_cred ) ;
}
out_drop_write :
ovl_drop_write ( dentry ) ;
out :
return err ;
}
static int ovl_unlink ( struct inode * dir , struct dentry * dentry )
{
return ovl_do_remove ( dentry , false ) ;
}
static int ovl_rmdir ( struct inode * dir , struct dentry * dentry )
{
return ovl_do_remove ( dentry , true ) ;
}
static int ovl_rename2 ( struct inode * olddir , struct dentry * old ,
struct inode * newdir , struct dentry * new ,
unsigned int flags )
{
int err ;
enum ovl_path_type old_type ;
enum ovl_path_type new_type ;
struct dentry * old_upperdir ;
struct dentry * new_upperdir ;
struct dentry * olddentry ;
struct dentry * newdentry ;
struct dentry * trap ;
bool old_opaque ;
bool new_opaque ;
bool new_create = false ;
bool cleanup_whiteout = false ;
bool overwrite = ! ( flags & RENAME_EXCHANGE ) ;
bool is_dir = S_ISDIR ( old - > d_inode - > i_mode ) ;
bool new_is_dir = false ;
struct dentry * opaquedir = NULL ;
const struct cred * old_cred = NULL ;
struct cred * override_cred = NULL ;
err = - EINVAL ;
if ( flags & ~ ( RENAME_EXCHANGE | RENAME_NOREPLACE ) )
goto out ;
flags & = ~ RENAME_NOREPLACE ;
err = ovl_check_sticky ( old ) ;
if ( err )
goto out ;
/* Don't copy up directory trees */
old_type = ovl_path_type ( old ) ;
err = - EXDEV ;
if ( ( old_type = = OVL_PATH_LOWER | | old_type = = OVL_PATH_MERGE ) & & is_dir )
goto out ;
if ( new - > d_inode ) {
err = ovl_check_sticky ( new ) ;
if ( err )
goto out ;
if ( S_ISDIR ( new - > d_inode - > i_mode ) )
new_is_dir = true ;
new_type = ovl_path_type ( new ) ;
err = - EXDEV ;
if ( ! overwrite & & ( new_type = = OVL_PATH_LOWER | | new_type = = OVL_PATH_MERGE ) & & new_is_dir )
goto out ;
err = 0 ;
if ( new_type = = OVL_PATH_LOWER & & old_type = = OVL_PATH_LOWER ) {
if ( ovl_dentry_lower ( old ) - > d_inode = =
ovl_dentry_lower ( new ) - > d_inode )
goto out ;
}
if ( new_type ! = OVL_PATH_LOWER & & old_type ! = OVL_PATH_LOWER ) {
if ( ovl_dentry_upper ( old ) - > d_inode = =
ovl_dentry_upper ( new ) - > d_inode )
goto out ;
}
} else {
if ( ovl_dentry_is_opaque ( new ) )
new_type = OVL_PATH_UPPER ;
else
new_type = OVL_PATH_PURE_UPPER ;
}
err = ovl_want_write ( old ) ;
if ( err )
goto out ;
err = ovl_copy_up ( old ) ;
if ( err )
goto out_drop_write ;
err = ovl_copy_up ( new - > d_parent ) ;
if ( err )
goto out_drop_write ;
if ( ! overwrite ) {
err = ovl_copy_up ( new ) ;
if ( err )
goto out_drop_write ;
}
old_opaque = old_type ! = OVL_PATH_PURE_UPPER ;
new_opaque = new_type ! = OVL_PATH_PURE_UPPER ;
if ( old_opaque | | new_opaque ) {
err = - ENOMEM ;
override_cred = prepare_creds ( ) ;
if ( ! override_cred )
goto out_drop_write ;
/*
* CAP_SYS_ADMIN for setting xattr on whiteout , opaque dir
* CAP_DAC_OVERRIDE for create in workdir
* CAP_FOWNER for removing whiteout from sticky dir
* CAP_FSETID for chmod of opaque dir
* CAP_CHOWN for chown of opaque dir
*/
cap_raise ( override_cred - > cap_effective , CAP_SYS_ADMIN ) ;
cap_raise ( override_cred - > cap_effective , CAP_DAC_OVERRIDE ) ;
cap_raise ( override_cred - > cap_effective , CAP_FOWNER ) ;
cap_raise ( override_cred - > cap_effective , CAP_FSETID ) ;
cap_raise ( override_cred - > cap_effective , CAP_CHOWN ) ;
old_cred = override_creds ( override_cred ) ;
}
if ( overwrite & & ( new_type = = OVL_PATH_LOWER | | new_type = = OVL_PATH_MERGE ) & & new_is_dir ) {
2014-11-20 16:39:59 +01:00
opaquedir = ovl_check_empty_and_clear ( new ) ;
overlay filesystem
Overlayfs allows one, usually read-write, directory tree to be
overlaid onto another, read-only directory tree. All modifications
go to the upper, writable layer.
This type of mechanism is most often used for live CDs but there's a
wide variety of other uses.
The implementation differs from other "union filesystem"
implementations in that after a file is opened all operations go
directly to the underlying, lower or upper, filesystems. This
simplifies the implementation and allows native performance in these
cases.
The dentry tree is duplicated from the underlying filesystems, this
enables fast cached lookups without adding special support into the
VFS. This uses slightly more memory than union mounts, but dentries
are relatively small.
Currently inodes are duplicated as well, but it is a possible
optimization to share inodes for non-directories.
Opening non directories results in the open forwarded to the
underlying filesystem. This makes the behavior very similar to union
mounts (with the same limitations vs. fchmod/fchown on O_RDONLY file
descriptors).
Usage:
mount -t overlayfs overlayfs -olowerdir=/lower,upperdir=/upper/upper,workdir=/upper/work /overlay
The following cotributions have been folded into this patch:
Neil Brown <neilb@suse.de>:
- minimal remount support
- use correct seek function for directories
- initialise is_real before use
- rename ovl_fill_cache to ovl_dir_read
Felix Fietkau <nbd@openwrt.org>:
- fix a deadlock in ovl_dir_read_merged
- fix a deadlock in ovl_remove_whiteouts
Erez Zadok <ezk@fsl.cs.sunysb.edu>
- fix cleanup after WARN_ON
Sedat Dilek <sedat.dilek@googlemail.com>
- fix up permission to confirm to new API
Robin Dong <hao.bigrat@gmail.com>
- fix possible leak in ovl_new_inode
- create new inode in ovl_link
Andy Whitcroft <apw@canonical.com>
- switch to __inode_permission()
- copy up i_uid/i_gid from the underlying inode
AV:
- ovl_copy_up_locked() - dput(ERR_PTR(...)) on two failure exits
- ovl_clear_empty() - one failure exit forgetting to do unlock_rename(),
lack of check for udir being the parent of upper, dropping and regaining
the lock on udir (which would require _another_ check for parent being
right).
- bogus d_drop() in copyup and rename [fix from your mail]
- copyup/remove and copyup/rename races [fix from your mail]
- ovl_dir_fsync() leaving ERR_PTR() in ->realfile
- ovl_entry_free() is pointless - it's just a kfree_rcu()
- fold ovl_do_lookup() into ovl_lookup()
- manually assigning ->d_op is wrong. Just use ->s_d_op.
[patches picked from Miklos]:
* copyup/remove and copyup/rename races
* bogus d_drop() in copyup and rename
Also thanks to the following people for testing and reporting bugs:
Jordi Pujol <jordipujolp@gmail.com>
Andy Whitcroft <apw@canonical.com>
Michal Suchanek <hramrach@centrum.cz>
Felix Fietkau <nbd@openwrt.org>
Erez Zadok <ezk@fsl.cs.sunysb.edu>
Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
2014-10-24 00:14:38 +02:00
err = PTR_ERR ( opaquedir ) ;
if ( IS_ERR ( opaquedir ) ) {
opaquedir = NULL ;
goto out_revert_creds ;
}
}
if ( overwrite ) {
if ( old_opaque ) {
if ( new - > d_inode | | ! new_opaque ) {
/* Whiteout source */
flags | = RENAME_WHITEOUT ;
} else {
/* Switch whiteouts */
flags | = RENAME_EXCHANGE ;
}
} else if ( is_dir & & ! new - > d_inode & & new_opaque ) {
flags | = RENAME_EXCHANGE ;
cleanup_whiteout = true ;
}
}
old_upperdir = ovl_dentry_upper ( old - > d_parent ) ;
new_upperdir = ovl_dentry_upper ( new - > d_parent ) ;
trap = lock_rename ( new_upperdir , old_upperdir ) ;
olddentry = ovl_dentry_upper ( old ) ;
newdentry = ovl_dentry_upper ( new ) ;
if ( newdentry ) {
if ( opaquedir ) {
newdentry = opaquedir ;
opaquedir = NULL ;
} else {
dget ( newdentry ) ;
}
} else {
new_create = true ;
newdentry = lookup_one_len ( new - > d_name . name , new_upperdir ,
new - > d_name . len ) ;
err = PTR_ERR ( newdentry ) ;
if ( IS_ERR ( newdentry ) )
goto out_unlock ;
}
err = - ESTALE ;
if ( olddentry - > d_parent ! = old_upperdir )
goto out_dput ;
if ( newdentry - > d_parent ! = new_upperdir )
goto out_dput ;
if ( olddentry = = trap )
goto out_dput ;
if ( newdentry = = trap )
goto out_dput ;
if ( is_dir & & ! old_opaque & & new_opaque ) {
err = ovl_set_opaque ( olddentry ) ;
if ( err )
goto out_dput ;
}
if ( ! overwrite & & new_is_dir & & old_opaque & & ! new_opaque ) {
err = ovl_set_opaque ( newdentry ) ;
if ( err )
goto out_dput ;
}
if ( old_opaque | | new_opaque ) {
err = ovl_do_rename ( old_upperdir - > d_inode , olddentry ,
new_upperdir - > d_inode , newdentry ,
flags ) ;
} else {
/* No debug for the plain case */
BUG_ON ( flags & ~ RENAME_EXCHANGE ) ;
err = vfs_rename ( old_upperdir - > d_inode , olddentry ,
new_upperdir - > d_inode , newdentry ,
NULL , flags ) ;
}
if ( err ) {
if ( is_dir & & ! old_opaque & & new_opaque )
ovl_remove_opaque ( olddentry ) ;
if ( ! overwrite & & new_is_dir & & old_opaque & & ! new_opaque )
ovl_remove_opaque ( newdentry ) ;
goto out_dput ;
}
if ( is_dir & & old_opaque & & ! new_opaque )
ovl_remove_opaque ( olddentry ) ;
if ( ! overwrite & & new_is_dir & & ! old_opaque & & new_opaque )
ovl_remove_opaque ( newdentry ) ;
if ( old_opaque ! = new_opaque ) {
ovl_dentry_set_opaque ( old , new_opaque ) ;
if ( ! overwrite )
ovl_dentry_set_opaque ( new , old_opaque ) ;
}
if ( cleanup_whiteout )
ovl_cleanup ( old_upperdir - > d_inode , newdentry ) ;
ovl_dentry_version_inc ( old - > d_parent ) ;
ovl_dentry_version_inc ( new - > d_parent ) ;
out_dput :
dput ( newdentry ) ;
out_unlock :
unlock_rename ( new_upperdir , old_upperdir ) ;
out_revert_creds :
if ( old_opaque | | new_opaque ) {
revert_creds ( old_cred ) ;
put_cred ( override_cred ) ;
}
out_drop_write :
ovl_drop_write ( old ) ;
out :
dput ( opaquedir ) ;
return err ;
}
const struct inode_operations ovl_dir_inode_operations = {
. lookup = ovl_lookup ,
. mkdir = ovl_mkdir ,
. symlink = ovl_symlink ,
. unlink = ovl_unlink ,
. rmdir = ovl_rmdir ,
. rename2 = ovl_rename2 ,
. link = ovl_link ,
. setattr = ovl_setattr ,
. create = ovl_create ,
. mknod = ovl_mknod ,
. permission = ovl_permission ,
. getattr = ovl_dir_getattr ,
. setxattr = ovl_setxattr ,
. getxattr = ovl_getxattr ,
. listxattr = ovl_listxattr ,
. removexattr = ovl_removexattr ,
} ;