2005-04-17 02:20:36 +04:00
# define MSNFS /* HACK HACK */
/*
* linux / fs / nfsd / vfs . c
*
* File operations used by nfsd . Some of these have been ripped from
* other parts of the kernel because they weren ' t exported , others
* are partial duplicates with added or changed functionality .
*
* Note that several functions dget ( ) the dentry upon which they want
* to act , most notably those that create directory entries . Response
* dentry ' s are dput ( ) ' d if necessary in the release callback .
* So if you notice code paths that apparently fail to dput ( ) the
* dentry , don ' t worry - - they have been taken care of .
*
* Copyright ( C ) 1995 - 1999 Olaf Kirch < okir @ monad . swb . de >
* Zerocpy NFS support ( C ) 2002 Hirokazu Takahashi < taka @ valinux . co . jp >
*/
# include <linux/string.h>
# include <linux/time.h>
# include <linux/errno.h>
# include <linux/fs.h>
# include <linux/file.h>
# include <linux/mount.h>
# include <linux/major.h>
2007-06-04 11:59:47 +04:00
# include <linux/splice.h>
2005-04-17 02:20:36 +04:00
# include <linux/proc_fs.h>
# include <linux/stat.h>
# include <linux/fcntl.h>
# include <linux/net.h>
# include <linux/unistd.h>
# include <linux/slab.h>
# include <linux/pagemap.h>
# include <linux/in.h>
# include <linux/module.h>
# include <linux/namei.h>
# include <linux/vfs.h>
# include <linux/delay.h>
# include <linux/sunrpc/svc.h>
# include <linux/nfsd/nfsd.h>
# ifdef CONFIG_NFSD_V3
# include <linux/nfs3.h>
# include <linux/nfsd/xdr3.h>
# endif /* CONFIG_NFSD_V3 */
# include <linux/nfsd/nfsfh.h>
# include <linux/quotaops.h>
[PATCH] inotify
inotify is intended to correct the deficiencies of dnotify, particularly
its inability to scale and its terrible user interface:
* dnotify requires the opening of one fd per each directory
that you intend to watch. This quickly results in too many
open files and pins removable media, preventing unmount.
* dnotify is directory-based. You only learn about changes to
directories. Sure, a change to a file in a directory affects
the directory, but you are then forced to keep a cache of
stat structures.
* dnotify's interface to user-space is awful. Signals?
inotify provides a more usable, simple, powerful solution to file change
notification:
* inotify's interface is a system call that returns a fd, not SIGIO.
You get a single fd, which is select()-able.
* inotify has an event that says "the filesystem that the item
you were watching is on was unmounted."
* inotify can watch directories or files.
Inotify is currently used by Beagle (a desktop search infrastructure),
Gamin (a FAM replacement), and other projects.
See Documentation/filesystems/inotify.txt.
Signed-off-by: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-07-13 01:06:03 +04:00
# include <linux/fsnotify.h>
2005-04-17 02:20:36 +04:00
# include <linux/posix_acl.h>
# include <linux/posix_acl_xattr.h>
# include <linux/xattr.h>
2006-01-10 07:51:55 +03:00
# ifdef CONFIG_NFSD_V4
2005-04-17 02:20:36 +04:00
# include <linux/nfs4.h>
# include <linux/nfs4_acl.h>
# include <linux/nfsd_idmap.h>
# include <linux/security.h>
# endif /* CONFIG_NFSD_V4 */
2006-10-04 13:15:49 +04:00
# include <linux/jhash.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# define NFSDDBG_FACILITY NFSDDBG_FILEOP
/*
* This is a cache of readahead params that help us choose the proper
* readahead strategy . Initially , we set all readahead parameters to 0
* and let the VFS handle things .
* If you increase the number of cached files very much , you ' ll need to
* add a hash table here .
*/
struct raparms {
struct raparms * p_next ;
unsigned int p_count ;
ino_t p_ino ;
dev_t p_dev ;
int p_set ;
struct file_ra_state p_ra ;
2006-10-04 13:15:49 +04:00
unsigned int p_hindex ;
2005-04-17 02:20:36 +04:00
} ;
2006-10-04 13:15:49 +04:00
struct raparm_hbucket {
struct raparms * pb_head ;
spinlock_t pb_lock ;
} ____cacheline_aligned_in_smp ;
# define RAPARM_HASH_BITS 4
# define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS)
# define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
static struct raparm_hbucket raparm_hash [ RAPARM_HASH_SIZE ] ;
2005-04-17 02:20:36 +04:00
/*
* Called from nfsd_lookup and encode_dirent . Check if we have crossed
* a mount point .
2006-12-13 11:35:25 +03:00
* Returns - EAGAIN or - ETIMEDOUT leaving * dpp and * expp unchanged ,
2005-04-17 02:20:36 +04:00
* or nfs_ok having possibly changed * dpp and * expp
*/
int
nfsd_cross_mnt ( struct svc_rqst * rqstp , struct dentry * * dpp ,
struct svc_export * * expp )
{
struct svc_export * exp = * expp , * exp2 = NULL ;
struct dentry * dentry = * dpp ;
2008-02-15 06:38:39 +03:00
struct vfsmount * mnt = mntget ( exp - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
struct dentry * mounts = dget ( dentry ) ;
2006-10-20 10:28:58 +04:00
int err = 0 ;
2005-04-17 02:20:36 +04:00
while ( follow_down ( & mnt , & mounts ) & & d_mountpoint ( mounts ) ) ;
2007-07-17 15:04:44 +04:00
exp2 = rqst_exp_get_by_name ( rqstp , mnt , mounts ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( exp2 ) ) {
2007-09-06 01:22:12 +04:00
if ( PTR_ERR ( exp2 ) ! = - ENOENT )
err = PTR_ERR ( exp2 ) ;
2005-04-17 02:20:36 +04:00
dput ( mounts ) ;
mntput ( mnt ) ;
goto out ;
}
2007-07-19 12:49:19 +04:00
if ( ( exp - > ex_flags & NFSEXP_CROSSMOUNT ) | | EX_NOHIDE ( exp2 ) ) {
2005-04-17 02:20:36 +04:00
/* successfully crossed mount point */
2009-04-18 10:32:31 +04:00
/*
* This is subtle : dentry is * not * under mnt at this point .
* The only reason we are safe is that original mnt is pinned
* down by exp , so we should dput before putting exp .
*/
2005-04-17 02:20:36 +04:00
dput ( dentry ) ;
* dpp = mounts ;
2009-04-18 10:32:31 +04:00
exp_put ( exp ) ;
* expp = exp2 ;
2005-04-17 02:20:36 +04:00
} else {
2007-07-19 12:49:19 +04:00
exp_put ( exp2 ) ;
2005-04-17 02:20:36 +04:00
dput ( mounts ) ;
}
mntput ( mnt ) ;
out :
return err ;
}
2006-10-20 10:28:58 +04:00
__be32
2007-07-17 15:04:47 +04:00
nfsd_lookup_dentry ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
2007-11-01 23:57:09 +03:00
const char * name , unsigned int len ,
2007-07-17 15:04:47 +04:00
struct svc_export * * exp_ret , struct dentry * * dentry_ret )
2005-04-17 02:20:36 +04:00
{
struct svc_export * exp ;
struct dentry * dparent ;
struct dentry * dentry ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
dprintk ( " nfsd: nfsd_lookup(fh %s, %.*s) \n " , SVCFH_fmt ( fhp ) , len , name ) ;
/* Obtain dentry and export. */
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFDIR , NFSD_MAY_EXEC ) ;
2005-04-17 02:20:36 +04:00
if ( err )
return err ;
dparent = fhp - > fh_dentry ;
exp = fhp - > fh_export ;
exp_get ( exp ) ;
/* Lookup the name, but don't follow links */
if ( isdotent ( name , len ) ) {
if ( len = = 1 )
dentry = dget ( dparent ) ;
2008-02-15 06:38:39 +03:00
else if ( dparent ! = exp - > ex_path . dentry )
2005-04-17 02:20:36 +04:00
dentry = dget_parent ( dparent ) ;
2008-02-15 06:38:39 +03:00
else if ( ! EX_NOHIDE ( exp ) )
2005-04-17 02:20:36 +04:00
dentry = dget ( dparent ) ; /* .. == . just like at / */
else {
/* checking mountpoint crossing is very different when stepping up */
struct svc_export * exp2 = NULL ;
struct dentry * dp ;
2008-02-15 06:38:39 +03:00
struct vfsmount * mnt = mntget ( exp - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
dentry = dget ( dparent ) ;
while ( dentry = = mnt - > mnt_root & & follow_up ( & mnt , & dentry ) )
;
dp = dget_parent ( dentry ) ;
dput ( dentry ) ;
dentry = dp ;
2007-07-17 15:04:44 +04:00
exp2 = rqst_exp_parent ( rqstp , mnt , dentry ) ;
2007-07-17 15:04:40 +04:00
if ( PTR_ERR ( exp2 ) = = - ENOENT ) {
dput ( dentry ) ;
dentry = dget ( dparent ) ;
} else if ( IS_ERR ( exp2 ) ) {
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( exp2 ) ;
2005-04-17 02:20:36 +04:00
dput ( dentry ) ;
mntput ( mnt ) ;
goto out_nfserr ;
} else {
exp_put ( exp ) ;
exp = exp2 ;
}
mntput ( mnt ) ;
}
} else {
fh_lock ( fhp ) ;
dentry = lookup_one_len ( name , dparent , len ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( dentry ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dentry ) )
goto out_nfserr ;
/*
* check if we have crossed a mount point . . .
*/
if ( d_mountpoint ( dentry ) ) {
2006-10-20 10:28:58 +04:00
if ( ( host_err = nfsd_cross_mnt ( rqstp , & dentry , & exp ) ) ) {
2005-04-17 02:20:36 +04:00
dput ( dentry ) ;
goto out_nfserr ;
}
}
}
2007-07-17 15:04:47 +04:00
* dentry_ret = dentry ;
* exp_ret = exp ;
return 0 ;
out_nfserr :
exp_put ( exp ) ;
return nfserrno ( host_err ) ;
}
/*
* Look up one component of a pathname .
* N . B . After this call _both_ fhp and resfh need an fh_put
*
* If the lookup would cross a mountpoint , and the mounted filesystem
* is exported to the client with NFSEXP_NOHIDE , then the lookup is
* accepted as it stands and the mounted directory is
* returned . Otherwise the covered directory is returned .
* NOTE : this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
* NeilBrown < neilb @ cse . unsw . edu . au >
*/
__be32
nfsd_lookup ( struct svc_rqst * rqstp , struct svc_fh * fhp , const char * name ,
2007-11-01 23:57:09 +03:00
unsigned int len , struct svc_fh * resfh )
2007-07-17 15:04:47 +04:00
{
struct svc_export * exp ;
struct dentry * dentry ;
__be32 err ;
err = nfsd_lookup_dentry ( rqstp , fhp , name , len , & exp , & dentry ) ;
if ( err )
return err ;
2007-07-17 15:04:48 +04:00
err = check_nfsd_access ( exp , rqstp ) ;
if ( err )
goto out ;
2005-04-17 02:20:36 +04:00
/*
* Note : we compose the file handle now , but as the
* dentry may be negative , it may need to be updated .
*/
err = fh_compose ( resfh , exp , dentry , fhp ) ;
if ( ! err & & ! dentry - > d_inode )
err = nfserr_noent ;
2007-07-17 15:04:48 +04:00
out :
2005-04-17 02:20:36 +04:00
dput ( dentry ) ;
exp_put ( exp ) ;
return err ;
}
2007-07-17 15:04:47 +04:00
2005-04-17 02:20:36 +04:00
/*
* Set various file attributes .
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_setattr ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct iattr * iap ,
int check_guard , time_t guardtime )
{
struct dentry * dentry ;
struct inode * inode ;
2008-06-16 15:20:29 +04:00
int accmode = NFSD_MAY_SATTR ;
2005-04-17 02:20:36 +04:00
int ftype = 0 ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
int size_change = 0 ;
if ( iap - > ia_valid & ( ATTR_ATIME | ATTR_MTIME | ATTR_SIZE ) )
2008-06-16 15:20:29 +04:00
accmode | = NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE ;
2005-04-17 02:20:36 +04:00
if ( iap - > ia_valid & ATTR_SIZE )
ftype = S_IFREG ;
/* Get inode */
err = fh_verify ( rqstp , fhp , ftype , accmode ) ;
2005-11-07 12:00:23 +03:00
if ( err )
2005-04-17 02:20:36 +04:00
goto out ;
dentry = fhp - > fh_dentry ;
inode = dentry - > d_inode ;
2005-11-07 12:00:23 +03:00
/* Ignore any mode updates on symlinks */
if ( S_ISLNK ( inode - > i_mode ) )
iap - > ia_valid & = ~ ATTR_MODE ;
if ( ! iap - > ia_valid )
goto out ;
2007-03-07 18:26:25 +03:00
/*
* NFSv2 does not differentiate between " set-[ac]time-to-now "
2005-04-17 02:20:36 +04:00
* which only requires access , and " set-[ac]time-to-X " which
* requires ownership .
* So if it looks like it might be " set both to the same time which
* is close to now " , and if inode_change_ok fails, then we
* convert to " set to now " instead of " set to explicit time "
*
* We only call inode_change_ok as the last test as technically
* it is not an interface that we should be using . It is only
* valid if the filesystem does not define it ' s own i_op - > setattr .
*/
# define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
# define MAX_TOUCH_TIME_ERROR (30*60)
2007-03-07 18:26:25 +03:00
if ( ( iap - > ia_valid & BOTH_TIME_SET ) = = BOTH_TIME_SET & &
iap - > ia_mtime . tv_sec = = iap - > ia_atime . tv_sec ) {
/*
* Looks probable .
*
* Now just make sure time is in the right ballpark .
* Solaris , at least , doesn ' t seem to care what the time
* request is . We require it be within 30 minutes of now .
2005-04-17 02:20:36 +04:00
*/
2007-03-07 18:26:25 +03:00
time_t delta = iap - > ia_atime . tv_sec - get_seconds ( ) ;
if ( delta < 0 )
delta = - delta ;
if ( delta < MAX_TOUCH_TIME_ERROR & &
inode_change_ok ( inode , iap ) ! = 0 ) {
/*
* Turn off ATTR_ [ AM ] TIME_SET but leave ATTR_ [ AM ] TIME .
* This will cause notify_change to set these times
* to " now "
*/
iap - > ia_valid & = ~ BOTH_TIME_SET ;
}
2005-04-17 02:20:36 +04:00
}
2007-03-07 18:26:25 +03:00
/*
* The size case is special .
* It changes the file as well as the attributes .
*/
2005-04-17 02:20:36 +04:00
if ( iap - > ia_valid & ATTR_SIZE ) {
if ( iap - > ia_size < inode - > i_size ) {
2008-06-16 15:20:29 +04:00
err = nfsd_permission ( rqstp , fhp - > fh_export , dentry ,
NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
}
/*
* If we are changing the size of the file , then
* we need to break all leases .
*/
2006-10-20 10:28:58 +04:00
host_err = break_lease ( inode , FMODE_WRITE | O_NONBLOCK ) ;
if ( host_err = = - EWOULDBLOCK )
host_err = - ETIMEDOUT ;
if ( host_err ) /* ENOMEM or EWOULDBLOCK */
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2006-10-20 10:28:58 +04:00
host_err = get_write_access ( inode ) ;
if ( host_err )
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
size_change = 1 ;
2006-10-20 10:28:58 +04:00
host_err = locks_verify_truncate ( inode , NULL , iap - > ia_size ) ;
if ( host_err ) {
2005-04-17 02:20:36 +04:00
put_write_access ( inode ) ;
goto out_nfserr ;
}
2009-02-10 00:22:21 +03:00
vfs_dq_init ( inode ) ;
2005-04-17 02:20:36 +04:00
}
knfsd: clear both setuid and setgid whenever a chown is done
Currently, knfsd only clears the setuid bit if the owner of a file is
changed on a SETATTR call, and only clears the setgid bit if the group
is changed. POSIX says this in the spec for chown():
"If the specified file is a regular file, one or more of the
S_IXUSR, S_IXGRP, or S_IXOTH bits of the file mode are set, and the
process does not have appropriate privileges, the set-user-ID
(S_ISUID) and set-group-ID (S_ISGID) bits of the file mode shall
be cleared upon successful return from chown()."
If I'm reading this correctly, then knfsd is doing this wrong. It should
be clearing both the setuid and setgid bit on any SETATTR that changes
the uid or gid. This wasn't really as noticable before, but now that the
ATTR_KILL_S*ID bits are a no-op for the NFS client, it's more evident.
This patch corrects the nfsd_setattr logic so that this occurs. It also
does a bit of cleanup to the function.
There is also one small behavioral change. If a SETATTR call comes in
that changes the uid/gid and the mode, then we now only clear the setgid
bit if the group execute bit isn't set. The setgid bit without a group
execute bit signifies mandatory locking and we likely don't want to
clear the bit in that case. Since there is no call in POSIX that should
generate a SETATTR call like this, then this should rarely happen, but
it's worth noting.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-04-17 00:28:47 +04:00
/* sanitize the mode change */
2005-04-17 02:20:36 +04:00
if ( iap - > ia_valid & ATTR_MODE ) {
iap - > ia_mode & = S_IALLUGO ;
2008-04-17 00:28:46 +04:00
iap - > ia_mode | = ( inode - > i_mode & ~ S_IALLUGO ) ;
knfsd: clear both setuid and setgid whenever a chown is done
Currently, knfsd only clears the setuid bit if the owner of a file is
changed on a SETATTR call, and only clears the setgid bit if the group
is changed. POSIX says this in the spec for chown():
"If the specified file is a regular file, one or more of the
S_IXUSR, S_IXGRP, or S_IXOTH bits of the file mode are set, and the
process does not have appropriate privileges, the set-user-ID
(S_ISUID) and set-group-ID (S_ISGID) bits of the file mode shall
be cleared upon successful return from chown()."
If I'm reading this correctly, then knfsd is doing this wrong. It should
be clearing both the setuid and setgid bit on any SETATTR that changes
the uid or gid. This wasn't really as noticable before, but now that the
ATTR_KILL_S*ID bits are a no-op for the NFS client, it's more evident.
This patch corrects the nfsd_setattr logic so that this occurs. It also
does a bit of cleanup to the function.
There is also one small behavioral change. If a SETATTR call comes in
that changes the uid/gid and the mode, then we now only clear the setgid
bit if the group execute bit isn't set. The setgid bit without a group
execute bit signifies mandatory locking and we likely don't want to
clear the bit in that case. Since there is no call in POSIX that should
generate a SETATTR call like this, then this should rarely happen, but
it's worth noting.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-04-17 00:28:47 +04:00
}
/* Revoke setuid/setgid on chown */
Inconsistent setattr behaviour
There is an inconsistency seen in the behaviour of nfs compared to other local
filesystems on linux when changing owner or group of a directory. If the
directory has SUID/SGID flags set, on changing owner or group on the directory,
the flags are stripped off on nfs. These flags are maintained on other
filesystems such as ext3.
To reproduce on a nfs share or local filesystem, run the following commands
mkdir test; chmod +s+g test; chown user1 test; ls -ld test
On the nfs share, the flags are stripped and the output seen is
drwxr-xr-x 2 user1 root 4096 Feb 23 2009 test
On other local filesystems(ex: ext3), the flags are not stripped and the output
seen is
drwsr-sr-x 2 user1 root 4096 Feb 23 13:57 test
chown_common() called from sys_chown() will only strip the flags if the inode is
not a directory.
static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
{
..
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
..
}
See: http://www.opengroup.org/onlinepubs/7990989775/xsh/chown.html
"If the path argument refers to a regular file, the set-user-ID (S_ISUID) and
set-group-ID (S_ISGID) bits of the file mode are cleared upon successful return
from chown(), unless the call is made by a process with appropriate privileges,
in which case it is implementation-dependent whether these bits are altered. If
chown() is successfully invoked on a file that is not a regular file, these
bits may be cleared. These bits are defined in <sys/stat.h>."
The behaviour as it stands does not appear to violate POSIX. However the
actions performed are inconsistent when comparing ext3 and nfs.
Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Acked-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2009-02-23 19:22:03 +03:00
if ( ! S_ISDIR ( inode - > i_mode ) & &
( ( ( iap - > ia_valid & ATTR_UID ) & & iap - > ia_uid ! = inode - > i_uid ) | |
( ( iap - > ia_valid & ATTR_GID ) & & iap - > ia_gid ! = inode - > i_gid ) ) ) {
knfsd: clear both setuid and setgid whenever a chown is done
Currently, knfsd only clears the setuid bit if the owner of a file is
changed on a SETATTR call, and only clears the setgid bit if the group
is changed. POSIX says this in the spec for chown():
"If the specified file is a regular file, one or more of the
S_IXUSR, S_IXGRP, or S_IXOTH bits of the file mode are set, and the
process does not have appropriate privileges, the set-user-ID
(S_ISUID) and set-group-ID (S_ISGID) bits of the file mode shall
be cleared upon successful return from chown()."
If I'm reading this correctly, then knfsd is doing this wrong. It should
be clearing both the setuid and setgid bit on any SETATTR that changes
the uid or gid. This wasn't really as noticable before, but now that the
ATTR_KILL_S*ID bits are a no-op for the NFS client, it's more evident.
This patch corrects the nfsd_setattr logic so that this occurs. It also
does a bit of cleanup to the function.
There is also one small behavioral change. If a SETATTR call comes in
that changes the uid/gid and the mode, then we now only clear the setgid
bit if the group execute bit isn't set. The setgid bit without a group
execute bit signifies mandatory locking and we likely don't want to
clear the bit in that case. Since there is no call in POSIX that should
generate a SETATTR call like this, then this should rarely happen, but
it's worth noting.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-04-17 00:28:47 +04:00
iap - > ia_valid | = ATTR_KILL_PRIV ;
if ( iap - > ia_valid & ATTR_MODE ) {
/* we're setting mode too, just clear the s*id bits */
2007-10-18 14:05:19 +04:00
iap - > ia_mode & = ~ S_ISUID ;
knfsd: clear both setuid and setgid whenever a chown is done
Currently, knfsd only clears the setuid bit if the owner of a file is
changed on a SETATTR call, and only clears the setgid bit if the group
is changed. POSIX says this in the spec for chown():
"If the specified file is a regular file, one or more of the
S_IXUSR, S_IXGRP, or S_IXOTH bits of the file mode are set, and the
process does not have appropriate privileges, the set-user-ID
(S_ISUID) and set-group-ID (S_ISGID) bits of the file mode shall
be cleared upon successful return from chown()."
If I'm reading this correctly, then knfsd is doing this wrong. It should
be clearing both the setuid and setgid bit on any SETATTR that changes
the uid or gid. This wasn't really as noticable before, but now that the
ATTR_KILL_S*ID bits are a no-op for the NFS client, it's more evident.
This patch corrects the nfsd_setattr logic so that this occurs. It also
does a bit of cleanup to the function.
There is also one small behavioral change. If a SETATTR call comes in
that changes the uid/gid and the mode, then we now only clear the setgid
bit if the group execute bit isn't set. The setgid bit without a group
execute bit signifies mandatory locking and we likely don't want to
clear the bit in that case. Since there is no call in POSIX that should
generate a SETATTR call like this, then this should rarely happen, but
it's worth noting.
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-04-17 00:28:47 +04:00
if ( iap - > ia_mode & S_IXGRP )
iap - > ia_mode & = ~ S_ISGID ;
} else {
/* set ATTR_KILL_* bits and let VFS handle it */
iap - > ia_valid | = ( ATTR_KILL_SUID | ATTR_KILL_SGID ) ;
2007-10-18 14:05:19 +04:00
}
2005-04-17 02:20:36 +04:00
}
/* Change the attributes. */
iap - > ia_valid | = ATTR_CTIME ;
err = nfserr_notsync ;
if ( ! check_guard | | guardtime = = inode - > i_ctime . tv_sec ) {
fh_lock ( fhp ) ;
2006-10-20 10:28:58 +04:00
host_err = notify_change ( dentry , iap ) ;
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
fh_unlock ( fhp ) ;
}
if ( size_change )
put_write_access ( inode ) ;
if ( ! err )
if ( EX_ISSYNC ( fhp - > fh_export ) )
write_inode_now ( inode , 1 ) ;
out :
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2006-01-10 07:51:55 +03:00
# if defined(CONFIG_NFSD_V2_ACL) || \
defined ( CONFIG_NFSD_V3_ACL ) | | \
defined ( CONFIG_NFSD_V4 )
static ssize_t nfsd_getxattr ( struct dentry * dentry , char * key , void * * buf )
{
ssize_t buflen ;
2008-10-22 13:18:36 +04:00
ssize_t ret ;
2006-01-10 07:51:55 +03:00
buflen = vfs_getxattr ( dentry , key , NULL , 0 ) ;
if ( buflen < = 0 )
return buflen ;
2005-04-17 02:20:36 +04:00
2006-01-10 07:51:55 +03:00
* buf = kmalloc ( buflen , GFP_KERNEL ) ;
if ( ! * buf )
return - ENOMEM ;
2008-10-22 13:18:36 +04:00
ret = vfs_getxattr ( dentry , key , * buf , buflen ) ;
if ( ret < 0 )
kfree ( * buf ) ;
return ret ;
2006-01-10 07:51:55 +03:00
}
# endif
# if defined(CONFIG_NFSD_V4)
2005-04-17 02:20:36 +04:00
static int
set_nfsv4_acl_one ( struct dentry * dentry , struct posix_acl * pacl , char * key )
{
int len ;
size_t buflen ;
char * buf = NULL ;
int error = 0 ;
buflen = posix_acl_xattr_size ( pacl - > a_count ) ;
buf = kmalloc ( buflen , GFP_KERNEL ) ;
error = - ENOMEM ;
if ( buf = = NULL )
goto out ;
len = posix_acl_to_xattr ( pacl , buf , buflen ) ;
if ( len < 0 ) {
error = len ;
goto out ;
}
2006-01-10 07:51:55 +03:00
error = vfs_setxattr ( dentry , key , buf , len , 0 ) ;
2005-04-17 02:20:36 +04:00
out :
kfree ( buf ) ;
return error ;
}
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd4_set_nfs4_acl ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
struct nfs4_acl * acl )
{
2006-10-20 10:28:58 +04:00
__be32 error ;
int host_error ;
2005-04-17 02:20:36 +04:00
struct dentry * dentry ;
struct inode * inode ;
struct posix_acl * pacl = NULL , * dpacl = NULL ;
unsigned int flags = 0 ;
/* Get inode */
2008-06-16 15:20:29 +04:00
error = fh_verify ( rqstp , fhp , 0 /* S_IFREG */ , NFSD_MAY_SATTR ) ;
2005-04-17 02:20:36 +04:00
if ( error )
2007-07-17 15:04:37 +04:00
return error ;
2005-04-17 02:20:36 +04:00
dentry = fhp - > fh_dentry ;
inode = dentry - > d_inode ;
if ( S_ISDIR ( inode - > i_mode ) )
flags = NFS4_ACL_DIR ;
2006-10-20 10:28:58 +04:00
host_error = nfs4_acl_nfsv4_to_posix ( acl , & pacl , & dpacl , flags ) ;
if ( host_error = = - EINVAL ) {
2007-07-17 15:04:37 +04:00
return nfserr_attrnotsupp ;
2006-10-20 10:28:58 +04:00
} else if ( host_error < 0 )
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2006-10-20 10:28:58 +04:00
host_error = set_nfsv4_acl_one ( dentry , pacl , POSIX_ACL_XATTR_ACCESS ) ;
if ( host_error < 0 )
2007-07-17 15:04:37 +04:00
goto out_release ;
2005-04-17 02:20:36 +04:00
2007-07-17 15:04:37 +04:00
if ( S_ISDIR ( inode - > i_mode ) )
2006-10-20 10:28:58 +04:00
host_error = set_nfsv4_acl_one ( dentry , dpacl , POSIX_ACL_XATTR_DEFAULT ) ;
2005-04-17 02:20:36 +04:00
2007-07-17 15:04:37 +04:00
out_release :
2005-04-17 02:20:36 +04:00
posix_acl_release ( pacl ) ;
posix_acl_release ( dpacl ) ;
out_nfserr :
2007-02-16 12:28:34 +03:00
if ( host_error = = - EOPNOTSUPP )
2007-07-17 15:04:37 +04:00
return nfserr_attrnotsupp ;
2007-02-16 12:28:34 +03:00
else
2007-07-17 15:04:37 +04:00
return nfserrno ( host_error ) ;
2005-04-17 02:20:36 +04:00
}
static struct posix_acl *
_get_posix_acl ( struct dentry * dentry , char * key )
{
2006-01-10 07:51:55 +03:00
void * buf = NULL ;
2005-04-17 02:20:36 +04:00
struct posix_acl * pacl = NULL ;
2006-01-10 07:51:55 +03:00
int buflen ;
2005-04-17 02:20:36 +04:00
2006-01-10 07:51:55 +03:00
buflen = nfsd_getxattr ( dentry , key , & buf ) ;
if ( ! buflen )
buflen = - ENODATA ;
if ( buflen < = 0 )
return ERR_PTR ( buflen ) ;
2005-04-17 02:20:36 +04:00
pacl = posix_acl_from_xattr ( buf , buflen ) ;
kfree ( buf ) ;
return pacl ;
}
int
nfsd4_get_nfs4_acl ( struct svc_rqst * rqstp , struct dentry * dentry , struct nfs4_acl * * acl )
{
struct inode * inode = dentry - > d_inode ;
int error = 0 ;
struct posix_acl * pacl = NULL , * dpacl = NULL ;
unsigned int flags = 0 ;
2005-06-23 11:10:19 +04:00
pacl = _get_posix_acl ( dentry , POSIX_ACL_XATTR_ACCESS ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( pacl ) & & PTR_ERR ( pacl ) = = - ENODATA )
pacl = posix_acl_from_mode ( inode - > i_mode , GFP_KERNEL ) ;
if ( IS_ERR ( pacl ) ) {
error = PTR_ERR ( pacl ) ;
pacl = NULL ;
goto out ;
}
if ( S_ISDIR ( inode - > i_mode ) ) {
2005-06-23 11:10:19 +04:00
dpacl = _get_posix_acl ( dentry , POSIX_ACL_XATTR_DEFAULT ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dpacl ) & & PTR_ERR ( dpacl ) = = - ENODATA )
dpacl = NULL ;
else if ( IS_ERR ( dpacl ) ) {
error = PTR_ERR ( dpacl ) ;
dpacl = NULL ;
goto out ;
}
flags = NFS4_ACL_DIR ;
}
* acl = nfs4_acl_posix_to_nfsv4 ( pacl , dpacl , flags ) ;
if ( IS_ERR ( * acl ) ) {
error = PTR_ERR ( * acl ) ;
* acl = NULL ;
}
out :
posix_acl_release ( pacl ) ;
posix_acl_release ( dpacl ) ;
return error ;
}
# endif /* defined(CONFIG_NFS_V4) */
# ifdef CONFIG_NFSD_V3
/*
* Check server access rights to a file system object
*/
struct accessmap {
u32 access ;
int how ;
} ;
static struct accessmap nfs3_regaccess [ ] = {
2008-06-16 15:20:29 +04:00
{ NFS3_ACCESS_READ , NFSD_MAY_READ } ,
{ NFS3_ACCESS_EXECUTE , NFSD_MAY_EXEC } ,
{ NFS3_ACCESS_MODIFY , NFSD_MAY_WRITE | NFSD_MAY_TRUNC } ,
{ NFS3_ACCESS_EXTEND , NFSD_MAY_WRITE } ,
2005-04-17 02:20:36 +04:00
{ 0 , 0 }
} ;
static struct accessmap nfs3_diraccess [ ] = {
2008-06-16 15:20:29 +04:00
{ NFS3_ACCESS_READ , NFSD_MAY_READ } ,
{ NFS3_ACCESS_LOOKUP , NFSD_MAY_EXEC } ,
{ NFS3_ACCESS_MODIFY , NFSD_MAY_EXEC | NFSD_MAY_WRITE | NFSD_MAY_TRUNC } ,
{ NFS3_ACCESS_EXTEND , NFSD_MAY_EXEC | NFSD_MAY_WRITE } ,
{ NFS3_ACCESS_DELETE , NFSD_MAY_REMOVE } ,
2005-04-17 02:20:36 +04:00
{ 0 , 0 }
} ;
static struct accessmap nfs3_anyaccess [ ] = {
/* Some clients - Solaris 2.6 at least, make an access call
* to the server to check for access for things like / dev / null
* ( which really , the server doesn ' t care about ) . So
* We provide simple access checking for them , looking
* mainly at mode bits , and we make sure to ignore read - only
* filesystem checks
*/
2008-06-16 15:20:29 +04:00
{ NFS3_ACCESS_READ , NFSD_MAY_READ } ,
{ NFS3_ACCESS_EXECUTE , NFSD_MAY_EXEC } ,
{ NFS3_ACCESS_MODIFY , NFSD_MAY_WRITE | NFSD_MAY_LOCAL_ACCESS } ,
{ NFS3_ACCESS_EXTEND , NFSD_MAY_WRITE | NFSD_MAY_LOCAL_ACCESS } ,
2005-04-17 02:20:36 +04:00
{ 0 , 0 }
} ;
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_access ( struct svc_rqst * rqstp , struct svc_fh * fhp , u32 * access , u32 * supported )
{
struct accessmap * map ;
struct svc_export * export ;
struct dentry * dentry ;
u32 query , result = 0 , sresult = 0 ;
2006-10-20 10:28:58 +04:00
__be32 error ;
2005-04-17 02:20:36 +04:00
2008-06-16 15:20:29 +04:00
error = fh_verify ( rqstp , fhp , 0 , NFSD_MAY_NOP ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto out ;
export = fhp - > fh_export ;
dentry = fhp - > fh_dentry ;
if ( S_ISREG ( dentry - > d_inode - > i_mode ) )
map = nfs3_regaccess ;
else if ( S_ISDIR ( dentry - > d_inode - > i_mode ) )
map = nfs3_diraccess ;
else
map = nfs3_anyaccess ;
query = * access ;
for ( ; map - > access ; map + + ) {
if ( map - > access & query ) {
2006-10-20 10:28:58 +04:00
__be32 err2 ;
2005-04-17 02:20:36 +04:00
sresult | = map - > access ;
2007-07-17 15:04:48 +04:00
err2 = nfsd_permission ( rqstp , export , dentry , map - > how ) ;
2005-04-17 02:20:36 +04:00
switch ( err2 ) {
case nfs_ok :
result | = map - > access ;
break ;
/* the following error codes just mean the access was not allowed,
* rather than an error occurred */
case nfserr_rofs :
case nfserr_acces :
case nfserr_perm :
/* simply don't "or" in the access bit. */
break ;
default :
error = err2 ;
goto out ;
}
}
}
* access = result ;
if ( supported )
* supported = sresult ;
out :
return error ;
}
# endif /* CONFIG_NFSD_V3 */
/*
* Open an existing file or directory .
* The access argument indicates the type of open ( read / write / lock )
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_open ( struct svc_rqst * rqstp , struct svc_fh * fhp , int type ,
int access , struct file * * filp )
{
2008-11-14 02:39:22 +03:00
const struct cred * cred = current_cred ( ) ;
2005-04-17 02:20:36 +04:00
struct dentry * dentry ;
struct inode * inode ;
2006-10-20 10:28:58 +04:00
int flags = O_RDONLY | O_LARGEFILE ;
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
/*
* If we get here , then the client has already done an " open " ,
* and ( hopefully ) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission .
*/
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , type , access | NFSD_MAY_OWNER_OVERRIDE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
dentry = fhp - > fh_dentry ;
inode = dentry - > d_inode ;
/* Disallow write access to files with the append-only bit set
* or any access when mandatory locking enabled
*/
err = nfserr_perm ;
2008-06-16 15:20:29 +04:00
if ( IS_APPEND ( inode ) & & ( access & NFSD_MAY_WRITE ) )
2005-04-17 02:20:36 +04:00
goto out ;
2007-10-02 22:18:12 +04:00
/*
* We must ignore files ( but only files ) which might have mandatory
* locks on them because there is no way to know if the accesser has
* the lock .
*/
if ( S_ISREG ( ( inode ) - > i_mode ) & & mandatory_lock ( inode ) )
2005-04-17 02:20:36 +04:00
goto out ;
if ( ! inode - > i_fop )
goto out ;
/*
* Check to see if there are any leases on this file .
* This may block while leases are broken .
*/
2008-06-16 15:20:29 +04:00
host_err = break_lease ( inode , O_NONBLOCK | ( ( access & NFSD_MAY_WRITE ) ? FMODE_WRITE : 0 ) ) ;
2006-10-20 10:28:58 +04:00
if ( host_err = = - EWOULDBLOCK )
host_err = - ETIMEDOUT ;
if ( host_err ) /* NOMEM or WOULDBLOCK */
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2008-06-16 15:20:29 +04:00
if ( access & NFSD_MAY_WRITE ) {
if ( access & NFSD_MAY_READ )
2006-06-30 12:56:17 +04:00
flags = O_RDWR | O_LARGEFILE ;
else
flags = O_WRONLY | O_LARGEFILE ;
2005-04-17 02:20:36 +04:00
2009-02-10 00:22:21 +03:00
vfs_dq_init ( inode ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-15 06:38:39 +03:00
* filp = dentry_open ( dget ( dentry ) , mntget ( fhp - > fh_export - > ex_path . mnt ) ,
2008-11-14 02:39:22 +03:00
flags , cred ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( * filp ) )
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( * filp ) ;
2005-04-17 02:20:36 +04:00
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
/*
* Close a file .
*/
void
nfsd_close ( struct file * filp )
{
fput ( filp ) ;
}
2009-01-06 21:37:03 +03:00
/*
* Sync a file
* As this calls fsync ( not fdatasync ) there is no need for a write_inode
* after it .
*/
static inline int nfsd_dosync ( struct file * filp , struct dentry * dp ,
const struct file_operations * fop )
{
struct inode * inode = dp - > d_inode ;
int ( * fsync ) ( struct file * , struct dentry * , int ) ;
int err ;
err = filemap_fdatawrite ( inode - > i_mapping ) ;
if ( err = = 0 & & fop & & ( fsync = fop - > fsync ) )
err = fsync ( filp , dp , 0 ) ;
if ( err = = 0 )
err = filemap_fdatawait ( inode - > i_mapping ) ;
return err ;
}
2006-01-06 11:19:58 +03:00
static int
2005-04-17 02:20:36 +04:00
nfsd_sync ( struct file * filp )
{
2009-01-06 21:37:03 +03:00
int err ;
struct inode * inode = filp - > f_path . dentry - > d_inode ;
dprintk ( " nfsd: sync file %s \n " , filp - > f_path . dentry - > d_name . name ) ;
mutex_lock ( & inode - > i_mutex ) ;
err = nfsd_dosync ( filp , filp - > f_path . dentry , filp - > f_op ) ;
mutex_unlock ( & inode - > i_mutex ) ;
return err ;
2005-04-17 02:20:36 +04:00
}
2006-01-19 04:43:13 +03:00
int
2009-01-06 21:37:03 +03:00
nfsd_sync_dir ( struct dentry * dp )
2005-04-17 02:20:36 +04:00
{
2009-01-06 21:37:03 +03:00
return nfsd_dosync ( NULL , dp , dp - > d_inode - > i_fop ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Obtain the readahead parameters for the file
* specified by ( dev , ino ) .
*/
static inline struct raparms *
nfsd_get_raparms ( dev_t dev , ino_t ino )
{
struct raparms * ra , * * rap , * * frap = NULL ;
int depth = 0 ;
2006-10-04 13:15:49 +04:00
unsigned int hash ;
struct raparm_hbucket * rab ;
hash = jhash_2words ( dev , ino , 0xfeedbeef ) & RAPARM_HASH_MASK ;
rab = & raparm_hash [ hash ] ;
2005-04-17 02:20:36 +04:00
2006-10-04 13:15:49 +04:00
spin_lock ( & rab - > pb_lock ) ;
for ( rap = & rab - > pb_head ; ( ra = * rap ) ; rap = & ra - > p_next ) {
2005-04-17 02:20:36 +04:00
if ( ra - > p_ino = = ino & & ra - > p_dev = = dev )
goto found ;
depth + + ;
if ( ra - > p_count = = 0 )
frap = rap ;
}
depth = nfsdstats . ra_size * 11 / 10 ;
if ( ! frap ) {
2006-10-04 13:15:49 +04:00
spin_unlock ( & rab - > pb_lock ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
rap = frap ;
ra = * frap ;
ra - > p_dev = dev ;
ra - > p_ino = ino ;
ra - > p_set = 0 ;
2006-10-04 13:15:49 +04:00
ra - > p_hindex = hash ;
2005-04-17 02:20:36 +04:00
found :
2006-10-04 13:15:49 +04:00
if ( rap ! = & rab - > pb_head ) {
2005-04-17 02:20:36 +04:00
* rap = ra - > p_next ;
2006-10-04 13:15:49 +04:00
ra - > p_next = rab - > pb_head ;
rab - > pb_head = ra ;
2005-04-17 02:20:36 +04:00
}
ra - > p_count + + ;
nfsdstats . ra_depth [ depth * 10 / nfsdstats . ra_size ] + + ;
2006-10-04 13:15:49 +04:00
spin_unlock ( & rab - > pb_lock ) ;
2005-04-17 02:20:36 +04:00
return ra ;
}
/*
2007-06-12 23:22:14 +04:00
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg / sendpage routines
* directly . They will be released after the sending has completed .
2005-04-17 02:20:36 +04:00
*/
static int
2007-06-12 23:22:14 +04:00
nfsd_splice_actor ( struct pipe_inode_info * pipe , struct pipe_buffer * buf ,
struct splice_desc * sd )
2005-04-17 02:20:36 +04:00
{
2007-06-12 23:22:14 +04:00
struct svc_rqst * rqstp = sd - > u . data ;
2006-10-04 13:15:46 +04:00
struct page * * pp = rqstp - > rq_respages + rqstp - > rq_resused ;
2007-06-12 23:22:14 +04:00
struct page * page = buf - > page ;
size_t size ;
int ret ;
2005-04-17 02:20:36 +04:00
2007-06-14 15:10:48 +04:00
ret = buf - > ops - > confirm ( pipe , buf ) ;
2007-06-12 23:22:14 +04:00
if ( unlikely ( ret ) )
return ret ;
size = sd - > len ;
2005-04-17 02:20:36 +04:00
if ( rqstp - > rq_res . page_len = = 0 ) {
get_page ( page ) ;
2006-10-04 13:15:46 +04:00
put_page ( * pp ) ;
* pp = page ;
rqstp - > rq_resused + + ;
2007-06-12 23:22:14 +04:00
rqstp - > rq_res . page_base = buf - > offset ;
2005-04-17 02:20:36 +04:00
rqstp - > rq_res . page_len = size ;
2006-10-04 13:15:46 +04:00
} else if ( page ! = pp [ - 1 ] ) {
2005-04-17 02:20:36 +04:00
get_page ( page ) ;
2007-01-26 11:56:59 +03:00
if ( * pp )
put_page ( * pp ) ;
2006-10-04 13:15:46 +04:00
* pp = page ;
rqstp - > rq_resused + + ;
2005-04-17 02:20:36 +04:00
rqstp - > rq_res . page_len + = size ;
2006-10-04 13:15:46 +04:00
} else
2005-04-17 02:20:36 +04:00
rqstp - > rq_res . page_len + = size ;
return size ;
}
2007-06-12 23:22:14 +04:00
static int nfsd_direct_splice_actor ( struct pipe_inode_info * pipe ,
struct splice_desc * sd )
{
return __splice_from_pipe ( pipe , sd , nfsd_splice_actor ) ;
}
2007-10-17 10:31:15 +04:00
static inline int svc_msnfs ( struct svc_fh * ffhp )
{
# ifdef MSNFS
return ( ffhp - > fh_export - > ex_flags & NFSEXP_MSNFS ) ;
# else
return 0 ;
# endif
}
2006-10-20 10:28:58 +04:00
static __be32
2005-04-17 02:20:36 +04:00
nfsd_vfs_read ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct file * file ,
loff_t offset , struct kvec * vec , int vlen , unsigned long * count )
{
struct inode * inode ;
struct raparms * ra ;
mm_segment_t oldfs ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
err = nfserr_perm ;
2006-12-08 13:36:41 +03:00
inode = file - > f_path . dentry - > d_inode ;
2007-10-17 10:31:15 +04:00
if ( svc_msnfs ( fhp ) & & ! lock_may_read ( inode , offset , * count ) )
2005-04-17 02:20:36 +04:00
goto out ;
/* Get readahead parameters */
ra = nfsd_get_raparms ( inode - > i_sb - > s_dev , inode - > i_ino ) ;
if ( ra & & ra - > p_set )
file - > f_ra = ra - > p_ra ;
2007-06-12 23:22:14 +04:00
if ( file - > f_op - > splice_read & & rqstp - > rq_splice_ok ) {
struct splice_desc sd = {
. len = 0 ,
. total_len = * count ,
. pos = offset ,
. u . data = rqstp ,
} ;
2007-07-14 00:42:20 +04:00
rqstp - > rq_resused = 1 ;
2007-06-12 23:22:14 +04:00
host_err = splice_direct_to_actor ( file , & sd , nfsd_direct_splice_actor ) ;
2005-04-17 02:20:36 +04:00
} else {
oldfs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
2006-10-20 10:28:58 +04:00
host_err = vfs_readv ( file , ( struct iovec __user * ) vec , vlen , & offset ) ;
2005-04-17 02:20:36 +04:00
set_fs ( oldfs ) ;
}
/* Write back readahead params */
if ( ra ) {
2006-10-04 13:15:49 +04:00
struct raparm_hbucket * rab = & raparm_hash [ ra - > p_hindex ] ;
spin_lock ( & rab - > pb_lock ) ;
2005-04-17 02:20:36 +04:00
ra - > p_ra = file - > f_ra ;
ra - > p_set = 1 ;
ra - > p_count - - ;
2006-10-04 13:15:49 +04:00
spin_unlock ( & rab - > pb_lock ) ;
2005-04-17 02:20:36 +04:00
}
2006-10-20 10:28:58 +04:00
if ( host_err > = 0 ) {
nfsdstats . io_read + = host_err ;
* count = host_err ;
2005-04-17 02:20:36 +04:00
err = 0 ;
2006-12-08 13:36:41 +03:00
fsnotify_access ( file - > f_path . dentry ) ;
2005-04-17 02:20:36 +04:00
} else
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
2006-01-06 11:19:59 +03:00
static void kill_suid ( struct dentry * dentry )
{
struct iattr ia ;
Implement file posix capabilities
Implement file posix capabilities. This allows programs to be given a
subset of root's powers regardless of who runs them, without having to use
setuid and giving the binary all of root's powers.
This version works with Kaigai Kohei's userspace tools, found at
http://www.kaigai.gr.jp/index.php. For more information on how to use this
patch, Chris Friedhoff has posted a nice page at
http://www.friedhoff.org/fscaps.html.
Changelog:
Nov 27:
Incorporate fixes from Andrew Morton
(security-introduce-file-caps-tweaks and
security-introduce-file-caps-warning-fix)
Fix Kconfig dependency.
Fix change signaling behavior when file caps are not compiled in.
Nov 13:
Integrate comments from Alexey: Remove CONFIG_ ifdef from
capability.h, and use %zd for printing a size_t.
Nov 13:
Fix endianness warnings by sparse as suggested by Alexey
Dobriyan.
Nov 09:
Address warnings of unused variables at cap_bprm_set_security
when file capabilities are disabled, and simultaneously clean
up the code a little, by pulling the new code into a helper
function.
Nov 08:
For pointers to required userspace tools and how to use
them, see http://www.friedhoff.org/fscaps.html.
Nov 07:
Fix the calculation of the highest bit checked in
check_cap_sanity().
Nov 07:
Allow file caps to be enabled without CONFIG_SECURITY, since
capabilities are the default.
Hook cap_task_setscheduler when !CONFIG_SECURITY.
Move capable(TASK_KILL) to end of cap_task_kill to reduce
audit messages.
Nov 05:
Add secondary calls in selinux/hooks.c to task_setioprio and
task_setscheduler so that selinux and capabilities with file
cap support can be stacked.
Sep 05:
As Seth Arnold points out, uid checks are out of place
for capability code.
Sep 01:
Define task_setscheduler, task_setioprio, cap_task_kill, and
task_setnice to make sure a user cannot affect a process in which
they called a program with some fscaps.
One remaining question is the note under task_setscheduler: are we
ok with CAP_SYS_NICE being sufficient to confine a process to a
cpuset?
It is a semantic change, as without fsccaps, attach_task doesn't
allow CAP_SYS_NICE to override the uid equivalence check. But since
it uses security_task_setscheduler, which elsewhere is used where
CAP_SYS_NICE can be used to override the uid equivalence check,
fixing it might be tough.
task_setscheduler
note: this also controls cpuset:attach_task. Are we ok with
CAP_SYS_NICE being used to confine to a cpuset?
task_setioprio
task_setnice
sys_setpriority uses this (through set_one_prio) for another
process. Need same checks as setrlimit
Aug 21:
Updated secureexec implementation to reflect the fact that
euid and uid might be the same and nonzero, but the process
might still have elevated caps.
Aug 15:
Handle endianness of xattrs.
Enforce capability version match between kernel and disk.
Enforce that no bits beyond the known max capability are
set, else return -EPERM.
With this extra processing, it may be worth reconsidering
doing all the work at bprm_set_security rather than
d_instantiate.
Aug 10:
Always call getxattr at bprm_set_security, rather than
caching it at d_instantiate.
[morgan@kernel.org: file-caps clean up for linux/capability.h]
[bunk@kernel.org: unexport cap_inode_killpriv]
Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: James Morris <jmorris@namei.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Andrew Morgan <morgan@kernel.org>
Signed-off-by: Andrew Morgan <morgan@kernel.org>
Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-10-17 10:31:36 +04:00
ia . ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV ;
2006-01-06 11:19:59 +03:00
2006-01-10 02:59:24 +03:00
mutex_lock ( & dentry - > d_inode - > i_mutex ) ;
2006-01-06 11:19:59 +03:00
notify_change ( dentry , & ia ) ;
2006-01-10 02:59:24 +03:00
mutex_unlock ( & dentry - > d_inode - > i_mutex ) ;
2006-01-06 11:19:59 +03:00
}
2006-10-20 10:28:58 +04:00
static __be32
2005-04-17 02:20:36 +04:00
nfsd_vfs_write ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct file * file ,
loff_t offset , struct kvec * vec , int vlen ,
2009-03-06 04:16:14 +03:00
unsigned long * cnt , int * stablep )
2005-04-17 02:20:36 +04:00
{
struct svc_export * exp ;
struct dentry * dentry ;
struct inode * inode ;
mm_segment_t oldfs ;
2006-10-20 10:28:58 +04:00
__be32 err = 0 ;
int host_err ;
2005-04-17 02:20:36 +04:00
int stable = * stablep ;
2006-01-19 04:43:50 +03:00
# ifdef MSNFS
2005-04-17 02:20:36 +04:00
err = nfserr_perm ;
if ( ( fhp - > fh_export - > ex_flags & NFSEXP_MSNFS ) & &
2009-03-06 04:16:14 +03:00
( ! lock_may_write ( file - > f_path . dentry - > d_inode , offset , * cnt ) ) )
2005-04-17 02:20:36 +04:00
goto out ;
# endif
2006-12-08 13:36:41 +03:00
dentry = file - > f_path . dentry ;
2005-04-17 02:20:36 +04:00
inode = dentry - > d_inode ;
exp = fhp - > fh_export ;
/*
* Request sync writes if
* - the sync export option has been set , or
* - the client requested O_SYNC behavior ( NFSv3 feature ) .
* - The file system doesn ' t support fsync ( ) .
* When gathered writes have been configured for this volume ,
* flushing the data to disk is handled separately below .
*/
2008-02-20 23:49:02 +03:00
if ( ! file - > f_op - > fsync ) { /* COMMIT3 cannot work */
2005-04-17 02:20:36 +04:00
stable = 2 ;
* stablep = 2 ; /* FILE_SYNC */
}
if ( ! EX_ISSYNC ( exp ) )
stable = 0 ;
2009-02-07 01:25:24 +03:00
if ( stable & & ! EX_WGATHER ( exp ) ) {
spin_lock ( & file - > f_lock ) ;
2005-04-17 02:20:36 +04:00
file - > f_flags | = O_SYNC ;
2009-02-07 01:25:24 +03:00
spin_unlock ( & file - > f_lock ) ;
}
2005-04-17 02:20:36 +04:00
/* Write the data. */
oldfs = get_fs ( ) ; set_fs ( KERNEL_DS ) ;
2006-10-20 10:28:58 +04:00
host_err = vfs_writev ( file , ( struct iovec __user * ) vec , vlen , & offset ) ;
2005-04-17 02:20:36 +04:00
set_fs ( oldfs ) ;
2006-10-20 10:28:58 +04:00
if ( host_err > = 0 ) {
2009-03-06 04:16:14 +03:00
nfsdstats . io_write + = host_err ;
2006-12-08 13:36:41 +03:00
fsnotify_modify ( file - > f_path . dentry ) ;
2005-04-17 02:20:36 +04:00
}
/* clear setuid/setgid flag after write */
2006-10-20 10:28:58 +04:00
if ( host_err > = 0 & & ( inode - > i_mode & ( S_ISUID | S_ISGID ) ) )
2006-01-06 11:19:59 +03:00
kill_suid ( dentry ) ;
2005-04-17 02:20:36 +04:00
2006-10-20 10:28:58 +04:00
if ( host_err > = 0 & & stable ) {
2005-04-17 02:20:36 +04:00
static ino_t last_ino ;
static dev_t last_dev ;
/*
* Gathered writes : If another process is currently
* writing to the file , there ' s a high chance
* this is another nfsd ( triggered by a bulk write
* from a client ' s biod ) . Rather than syncing the
* file with each write request , we sleep for 10 msec .
*
* I don ' t know if this roughly approximates
* C . Juszak ' s idea of gathered writes , but it ' s a
* nice and simple solution ( IMHO ) , and it seems to
* work : - )
*/
if ( EX_WGATHER ( exp ) ) {
if ( atomic_read ( & inode - > i_writecount ) > 1
| | ( last_ino = = inode - > i_ino & & last_dev = = inode - > i_sb - > s_dev ) ) {
2007-10-19 10:40:40 +04:00
dprintk ( " nfsd: write defer %d \n " , task_pid_nr ( current ) ) ;
2005-04-17 02:20:36 +04:00
msleep ( 10 ) ;
2007-10-19 10:40:40 +04:00
dprintk ( " nfsd: write resume %d \n " , task_pid_nr ( current ) ) ;
2005-04-17 02:20:36 +04:00
}
if ( inode - > i_state & I_DIRTY ) {
2007-10-19 10:40:40 +04:00
dprintk ( " nfsd: write sync %d \n " , task_pid_nr ( current ) ) ;
2006-10-20 10:28:58 +04:00
host_err = nfsd_sync ( file ) ;
2005-04-17 02:20:36 +04:00
}
#if 0
wake_up ( & inode - > i_wait ) ;
# endif
}
last_ino = inode - > i_ino ;
last_dev = inode - > i_sb - > s_dev ;
}
2006-10-20 10:28:58 +04:00
dprintk ( " nfsd: write complete host_err=%d \n " , host_err ) ;
2009-03-06 04:16:14 +03:00
if ( host_err > = 0 ) {
2005-04-17 02:20:36 +04:00
err = 0 ;
2009-03-06 04:16:14 +03:00
* cnt = host_err ;
} else
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
/*
* Read data from a file . count must contain the requested read count
* on entry . On return , * count contains the number of bytes actually read .
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_read ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct file * file ,
loff_t offset , struct kvec * vec , int vlen ,
unsigned long * count )
{
2006-10-20 10:28:58 +04:00
__be32 err ;
2005-04-17 02:20:36 +04:00
if ( file ) {
2007-07-17 15:04:48 +04:00
err = nfsd_permission ( rqstp , fhp - > fh_export , fhp - > fh_dentry ,
2008-06-16 15:20:29 +04:00
NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
err = nfsd_vfs_read ( rqstp , fhp , file , offset , vec , vlen , count ) ;
} else {
2008-06-16 15:20:29 +04:00
err = nfsd_open ( rqstp , fhp , S_IFREG , NFSD_MAY_READ , & file ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
err = nfsd_vfs_read ( rqstp , fhp , file , offset , vec , vlen , count ) ;
nfsd_close ( file ) ;
}
out :
return err ;
}
/*
* Write data to a file .
* The stable flag requests synchronous writes .
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_write ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct file * file ,
2009-03-06 04:16:14 +03:00
loff_t offset , struct kvec * vec , int vlen , unsigned long * cnt ,
2005-04-17 02:20:36 +04:00
int * stablep )
{
2006-10-20 10:28:58 +04:00
__be32 err = 0 ;
2005-04-17 02:20:36 +04:00
if ( file ) {
2007-07-17 15:04:48 +04:00
err = nfsd_permission ( rqstp , fhp - > fh_export , fhp - > fh_dentry ,
2008-06-16 15:20:29 +04:00
NFSD_MAY_WRITE | NFSD_MAY_OWNER_OVERRIDE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
err = nfsd_vfs_write ( rqstp , fhp , file , offset , vec , vlen , cnt ,
stablep ) ;
} else {
2008-06-16 15:20:29 +04:00
err = nfsd_open ( rqstp , fhp , S_IFREG , NFSD_MAY_WRITE , & file ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
if ( cnt )
err = nfsd_vfs_write ( rqstp , fhp , file , offset , vec , vlen ,
cnt , stablep ) ;
nfsd_close ( file ) ;
}
out :
return err ;
}
# ifdef CONFIG_NFSD_V3
/*
* Commit all pending writes to stable storage .
* Strictly speaking , we could sync just the indicated file region here ,
* but there ' s currently no way we can ask the VFS to do so .
*
* Unfortunately we cannot lock the file to make sure we return full WCC
* data to the client , as locking happens lower down in the filesystem .
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_commit ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
loff_t offset , unsigned long count )
{
struct file * file ;
2006-10-20 10:28:58 +04:00
__be32 err ;
2005-04-17 02:20:36 +04:00
if ( ( u64 ) count > ~ ( u64 ) offset )
return nfserr_inval ;
2008-06-16 15:20:29 +04:00
err = nfsd_open ( rqstp , fhp , S_IFREG , NFSD_MAY_WRITE , & file ) ;
if ( err )
2005-04-17 02:20:36 +04:00
return err ;
if ( EX_ISSYNC ( fhp - > fh_export ) ) {
if ( file - > f_op & & file - > f_op - > fsync ) {
2006-01-19 04:43:50 +03:00
err = nfserrno ( nfsd_sync ( file ) ) ;
2005-04-17 02:20:36 +04:00
} else {
err = nfserr_notsupp ;
}
}
nfsd_close ( file ) ;
return err ;
}
# endif /* CONFIG_NFSD_V3 */
2008-02-14 00:30:26 +03:00
static __be32
2007-12-01 00:55:23 +03:00
nfsd_create_setattr ( struct svc_rqst * rqstp , struct svc_fh * resfhp ,
struct iattr * iap )
{
/*
* Mode has already been set earlier in create :
*/
iap - > ia_valid & = ~ ATTR_MODE ;
/*
* Setting uid / gid works only for root . Irix appears to
* send along the gid on create when it tries to implement
* setgid directories via NFS :
*/
2008-11-14 02:38:58 +03:00
if ( current_fsuid ( ) ! = 0 )
2007-12-01 00:55:23 +03:00
iap - > ia_valid & = ~ ( ATTR_UID | ATTR_GID ) ;
if ( iap - > ia_valid )
return nfsd_setattr ( rqstp , resfhp , iap , 0 , ( time_t ) 0 ) ;
return 0 ;
}
2009-02-10 06:27:51 +03:00
/* HPUX client sometimes creates a file in mode 000, and sets size to 0.
* setting size to 0 may fail for some specific file systems by the permission
* checking which requires WRITE permission but the mode is 000.
* we ignore the resizing ( to 0 ) on the just new created file , since the size is
* 0 after file created .
*
* call this only after vfs_create ( ) is called .
* */
static void
nfsd_check_ignore_resizing ( struct iattr * iap )
{
if ( ( iap - > ia_valid & ATTR_SIZE ) & & ( iap - > ia_size = = 0 ) )
iap - > ia_valid & = ~ ATTR_SIZE ;
}
2005-04-17 02:20:36 +04:00
/*
* Create a file ( regular , directory , device , fifo ) ; UNIX sockets
* not yet implemented .
* If the response fh has been verified , the parent directory should
* already be locked . Note that the parent directory is left locked .
*
* N . B . Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_create ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
char * fname , int flen , struct iattr * iap ,
int type , dev_t rdev , struct svc_fh * resfhp )
{
struct dentry * dentry , * dchild = NULL ;
struct inode * dirp ;
2006-10-20 10:28:58 +04:00
__be32 err ;
2007-12-01 00:55:23 +03:00
__be32 err2 ;
2006-10-20 10:28:58 +04:00
int host_err ;
2005-04-17 02:20:36 +04:00
err = nfserr_perm ;
if ( ! flen )
goto out ;
err = nfserr_exist ;
if ( isdotent ( fname , flen ) )
goto out ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFDIR , NFSD_MAY_CREATE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
dentry = fhp - > fh_dentry ;
dirp = dentry - > d_inode ;
err = nfserr_notdir ;
2008-12-04 18:06:33 +03:00
if ( ! dirp - > i_op - > lookup )
2005-04-17 02:20:36 +04:00
goto out ;
/*
* Check whether the response file handle has been verified yet .
* If it has , the parent directory should already be locked .
*/
if ( ! resfhp - > fh_dentry ) {
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
2006-10-02 13:18:03 +04:00
fh_lock_nested ( fhp , I_MUTEX_PARENT ) ;
2005-04-17 02:20:36 +04:00
dchild = lookup_one_len ( fname , dentry , flen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( dchild ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dchild ) )
goto out_nfserr ;
err = fh_compose ( resfhp , fhp - > fh_export , dchild , fhp ) ;
if ( err )
goto out ;
} else {
/* called from nfsd_proc_create */
dchild = dget ( resfhp - > fh_dentry ) ;
if ( ! fhp - > fh_locked ) {
/* not actually possible */
printk ( KERN_ERR
" nfsd_create: parent %s/%s not locked! \n " ,
dentry - > d_parent - > d_name . name ,
dentry - > d_name . name ) ;
2006-01-19 04:43:44 +03:00
err = nfserr_io ;
2005-04-17 02:20:36 +04:00
goto out ;
}
}
/*
* Make sure the child dentry is still negative . . .
*/
err = nfserr_exist ;
if ( dchild - > d_inode ) {
dprintk ( " nfsd_create: dentry %s/%s not negative! \n " ,
dentry - > d_name . name , dchild - > d_name . name ) ;
goto out ;
}
if ( ! ( iap - > ia_valid & ATTR_MODE ) )
iap - > ia_mode = 0 ;
iap - > ia_mode = ( iap - > ia_mode & S_IALLUGO ) | type ;
2008-07-01 17:38:35 +04:00
err = nfserr_inval ;
if ( ! S_ISREG ( type ) & & ! S_ISDIR ( type ) & & ! special_file ( type ) ) {
printk ( KERN_WARNING " nfsd: bad file type %o in nfsd_create \n " ,
type ) ;
goto out ;
}
host_err = mnt_want_write ( fhp - > fh_export - > ex_path . mnt ) ;
if ( host_err )
goto out_nfserr ;
2005-04-17 02:20:36 +04:00
/*
* Get the dir op function pointer .
*/
2006-11-09 04:44:59 +03:00
err = 0 ;
2005-04-17 02:20:36 +04:00
switch ( type ) {
case S_IFREG :
2006-10-20 10:28:58 +04:00
host_err = vfs_create ( dirp , dchild , iap - > ia_mode , NULL ) ;
2009-02-10 06:27:51 +03:00
if ( ! host_err )
nfsd_check_ignore_resizing ( iap ) ;
2005-04-17 02:20:36 +04:00
break ;
case S_IFDIR :
2006-10-20 10:28:58 +04:00
host_err = vfs_mkdir ( dirp , dchild , iap - > ia_mode ) ;
2005-04-17 02:20:36 +04:00
break ;
case S_IFCHR :
case S_IFBLK :
case S_IFIFO :
case S_IFSOCK :
2006-10-20 10:28:58 +04:00
host_err = vfs_mknod ( dirp , dchild , iap - > ia_mode , rdev ) ;
2005-04-17 02:20:36 +04:00
break ;
}
2008-02-16 01:37:57 +03:00
if ( host_err < 0 ) {
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2008-02-16 01:37:57 +03:00
}
2005-04-17 02:20:36 +04:00
if ( EX_ISSYNC ( fhp - > fh_export ) ) {
2006-01-19 04:43:50 +03:00
err = nfserrno ( nfsd_sync_dir ( dentry ) ) ;
2005-04-17 02:20:36 +04:00
write_inode_now ( dchild - > d_inode , 1 ) ;
}
2007-12-01 00:55:23 +03:00
err2 = nfsd_create_setattr ( rqstp , resfhp , iap ) ;
if ( err2 )
err = err2 ;
2008-02-16 01:37:57 +03:00
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
/*
* Update the file handle to get the new inode info .
*/
if ( ! err )
err = fh_update ( resfhp ) ;
out :
if ( dchild & & ! IS_ERR ( dchild ) )
dput ( dchild ) ;
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
# ifdef CONFIG_NFSD_V3
/*
* NFSv3 version of nfsd_create
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_create_v3 ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
char * fname , int flen , struct iattr * iap ,
struct svc_fh * resfhp , int createmode , u32 * verifier ,
2006-11-09 04:44:40 +03:00
int * truncp , int * created )
2005-04-17 02:20:36 +04:00
{
struct dentry * dentry , * dchild = NULL ;
struct inode * dirp ;
2006-10-20 10:28:58 +04:00
__be32 err ;
2007-12-01 00:55:23 +03:00
__be32 err2 ;
2006-10-20 10:28:58 +04:00
int host_err ;
2005-04-17 02:20:36 +04:00
__u32 v_mtime = 0 , v_atime = 0 ;
err = nfserr_perm ;
if ( ! flen )
goto out ;
err = nfserr_exist ;
if ( isdotent ( fname , flen ) )
goto out ;
if ( ! ( iap - > ia_valid & ATTR_MODE ) )
iap - > ia_mode = 0 ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFDIR , NFSD_MAY_CREATE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
dentry = fhp - > fh_dentry ;
dirp = dentry - > d_inode ;
/* Get all the sanity checks out of the way before
* we lock the parent . */
err = nfserr_notdir ;
2008-12-04 18:06:33 +03:00
if ( ! dirp - > i_op - > lookup )
2005-04-17 02:20:36 +04:00
goto out ;
2006-10-02 13:18:03 +04:00
fh_lock_nested ( fhp , I_MUTEX_PARENT ) ;
2005-04-17 02:20:36 +04:00
/*
* Compose the response file handle .
*/
dchild = lookup_one_len ( fname , dentry , flen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( dchild ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dchild ) )
goto out_nfserr ;
err = fh_compose ( resfhp , fhp - > fh_export , dchild , fhp ) ;
if ( err )
goto out ;
if ( createmode = = NFS3_CREATE_EXCLUSIVE ) {
2007-01-26 11:57:00 +03:00
/* solaris7 gets confused (bugid 4218508) if these have
2007-07-31 11:37:51 +04:00
* the high bit set , so just clear the high bits . If this is
* ever changed to use different attrs for storing the
* verifier , then do_open_lookup ( ) will also need to be fixed
* accordingly .
2005-04-17 02:20:36 +04:00
*/
v_mtime = verifier [ 0 ] & 0x7fffffff ;
v_atime = verifier [ 1 ] & 0x7fffffff ;
}
2008-02-16 01:37:57 +03:00
host_err = mnt_want_write ( fhp - > fh_export - > ex_path . mnt ) ;
if ( host_err )
goto out_nfserr ;
2005-04-17 02:20:36 +04:00
if ( dchild - > d_inode ) {
err = 0 ;
switch ( createmode ) {
case NFS3_CREATE_UNCHECKED :
if ( ! S_ISREG ( dchild - > d_inode - > i_mode ) )
err = nfserr_exist ;
else if ( truncp ) {
/* in nfsv4, we need to treat this case a little
* differently . we don ' t want to truncate the
* file now ; this would be wrong if the OPEN
* fails for some other reason . furthermore ,
* if the size is nonzero , we should ignore it
* according to spec !
*/
* truncp = ( iap - > ia_valid & ATTR_SIZE ) & & ! iap - > ia_size ;
}
else {
iap - > ia_valid & = ATTR_SIZE ;
goto set_attr ;
}
break ;
case NFS3_CREATE_EXCLUSIVE :
if ( dchild - > d_inode - > i_mtime . tv_sec = = v_mtime
& & dchild - > d_inode - > i_atime . tv_sec = = v_atime
& & dchild - > d_inode - > i_size = = 0 )
break ;
/* fallthru */
case NFS3_CREATE_GUARDED :
err = nfserr_exist ;
}
2008-02-16 01:37:57 +03:00
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
2006-10-20 10:28:58 +04:00
host_err = vfs_create ( dirp , dchild , iap - > ia_mode , NULL ) ;
2008-02-16 01:37:57 +03:00
if ( host_err < 0 ) {
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2008-02-16 01:37:57 +03:00
}
2006-11-09 04:44:40 +03:00
if ( created )
* created = 1 ;
2005-04-17 02:20:36 +04:00
if ( EX_ISSYNC ( fhp - > fh_export ) ) {
2006-01-19 04:43:50 +03:00
err = nfserrno ( nfsd_sync_dir ( dentry ) ) ;
2005-04-17 02:20:36 +04:00
/* setattr will sync the child (or not) */
}
2009-02-10 06:27:51 +03:00
nfsd_check_ignore_resizing ( iap ) ;
2005-04-17 02:20:36 +04:00
if ( createmode = = NFS3_CREATE_EXCLUSIVE ) {
2007-01-26 11:57:00 +03:00
/* Cram the verifier into atime/mtime */
2005-04-17 02:20:36 +04:00
iap - > ia_valid = ATTR_MTIME | ATTR_ATIME
2007-01-26 11:57:00 +03:00
| ATTR_MTIME_SET | ATTR_ATIME_SET ;
2005-04-17 02:20:36 +04:00
/* XXX someone who knows this better please fix it for nsec */
iap - > ia_mtime . tv_sec = v_mtime ;
iap - > ia_atime . tv_sec = v_atime ;
iap - > ia_mtime . tv_nsec = 0 ;
iap - > ia_atime . tv_nsec = 0 ;
}
set_attr :
2007-12-01 00:55:23 +03:00
err2 = nfsd_create_setattr ( rqstp , resfhp , iap ) ;
if ( err2 )
err = err2 ;
2006-01-19 04:43:13 +03:00
2008-02-16 01:37:57 +03:00
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2006-01-19 04:43:13 +03:00
/*
* Update the filehandle to get the new inode info .
*/
if ( ! err )
err = fh_update ( resfhp ) ;
2005-04-17 02:20:36 +04:00
out :
fh_unlock ( fhp ) ;
if ( dchild & & ! IS_ERR ( dchild ) )
dput ( dchild ) ;
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
# endif /* CONFIG_NFSD_V3 */
/*
* Read a symlink . On entry , * lenp must contain the maximum path length that
* fits into the buffer . On return , it contains the true length .
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_readlink ( struct svc_rqst * rqstp , struct svc_fh * fhp , char * buf , int * lenp )
{
struct dentry * dentry ;
struct inode * inode ;
mm_segment_t oldfs ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFLNK , NFSD_MAY_NOP ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
dentry = fhp - > fh_dentry ;
inode = dentry - > d_inode ;
err = nfserr_inval ;
2008-12-04 18:06:33 +03:00
if ( ! inode - > i_op - > readlink )
2005-04-17 02:20:36 +04:00
goto out ;
2008-02-15 06:38:39 +03:00
touch_atime ( fhp - > fh_export - > ex_path . mnt , dentry ) ;
2005-04-17 02:20:36 +04:00
/* N.B. Why does this call need a get_fs()??
* Remove the set_fs and watch the fireworks : - ) - - okir
*/
oldfs = get_fs ( ) ; set_fs ( KERNEL_DS ) ;
2006-10-20 10:28:58 +04:00
host_err = inode - > i_op - > readlink ( dentry , buf , * lenp ) ;
2005-04-17 02:20:36 +04:00
set_fs ( oldfs ) ;
2006-10-20 10:28:58 +04:00
if ( host_err < 0 )
2005-04-17 02:20:36 +04:00
goto out_nfserr ;
2006-10-20 10:28:58 +04:00
* lenp = host_err ;
2005-04-17 02:20:36 +04:00
err = 0 ;
out :
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
/*
* Create a symlink and look up its inode
* N . B . After this call _both_ fhp and resfhp need an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_symlink ( struct svc_rqst * rqstp , struct svc_fh * fhp ,
char * fname , int flen ,
char * path , int plen ,
struct svc_fh * resfhp ,
struct iattr * iap )
{
struct dentry * dentry , * dnew ;
2006-10-20 10:28:58 +04:00
__be32 err , cerr ;
int host_err ;
2005-04-17 02:20:36 +04:00
err = nfserr_noent ;
if ( ! flen | | ! plen )
goto out ;
err = nfserr_exist ;
if ( isdotent ( fname , flen ) )
goto out ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFDIR , NFSD_MAY_CREATE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
fh_lock ( fhp ) ;
dentry = fhp - > fh_dentry ;
dnew = lookup_one_len ( fname , dentry , flen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( dnew ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dnew ) )
goto out_nfserr ;
2008-02-16 01:37:45 +03:00
host_err = mnt_want_write ( fhp - > fh_export - > ex_path . mnt ) ;
if ( host_err )
goto out_nfserr ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( path [ plen ] ! = 0 ) ) {
char * path_alloced = kmalloc ( plen + 1 , GFP_KERNEL ) ;
if ( path_alloced = = NULL )
2006-10-20 10:28:58 +04:00
host_err = - ENOMEM ;
2005-04-17 02:20:36 +04:00
else {
strncpy ( path_alloced , path , plen ) ;
path_alloced [ plen ] = 0 ;
2008-06-24 18:50:16 +04:00
host_err = vfs_symlink ( dentry - > d_inode , dnew , path_alloced ) ;
2005-04-17 02:20:36 +04:00
kfree ( path_alloced ) ;
}
} else
2008-06-24 18:50:16 +04:00
host_err = vfs_symlink ( dentry - > d_inode , dnew , path ) ;
2005-04-17 02:20:36 +04:00
2006-10-20 10:28:58 +04:00
if ( ! host_err ) {
2005-04-17 02:20:36 +04:00
if ( EX_ISSYNC ( fhp - > fh_export ) )
2006-10-20 10:28:58 +04:00
host_err = nfsd_sync_dir ( dentry ) ;
}
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
fh_unlock ( fhp ) ;
2008-02-16 01:37:45 +03:00
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
cerr = fh_compose ( resfhp , fhp - > fh_export , dnew , fhp ) ;
dput ( dnew ) ;
if ( err = = 0 ) err = cerr ;
out :
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
goto out ;
}
/*
* Create a hardlink
* N . B . After this call _both_ ffhp and tfhp need an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_link ( struct svc_rqst * rqstp , struct svc_fh * ffhp ,
char * name , int len , struct svc_fh * tfhp )
{
struct dentry * ddir , * dnew , * dold ;
struct inode * dirp , * dest ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , ffhp , S_IFDIR , NFSD_MAY_CREATE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , tfhp , - S_IFDIR , NFSD_MAY_NOP ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
err = nfserr_perm ;
if ( ! len )
goto out ;
err = nfserr_exist ;
if ( isdotent ( name , len ) )
goto out ;
2006-10-02 13:18:03 +04:00
fh_lock_nested ( ffhp , I_MUTEX_PARENT ) ;
2005-04-17 02:20:36 +04:00
ddir = ffhp - > fh_dentry ;
dirp = ddir - > d_inode ;
dnew = lookup_one_len ( name , ddir , len ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( dnew ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( dnew ) )
goto out_nfserr ;
dold = tfhp - > fh_dentry ;
dest = dold - > d_inode ;
2008-02-16 01:37:45 +03:00
host_err = mnt_want_write ( tfhp - > fh_export - > ex_path . mnt ) ;
if ( host_err ) {
err = nfserrno ( host_err ) ;
goto out_dput ;
}
2006-10-20 10:28:58 +04:00
host_err = vfs_link ( dold , dirp , dnew ) ;
if ( ! host_err ) {
2005-04-17 02:20:36 +04:00
if ( EX_ISSYNC ( ffhp - > fh_export ) ) {
2006-01-19 04:43:50 +03:00
err = nfserrno ( nfsd_sync_dir ( ddir ) ) ;
2005-04-17 02:20:36 +04:00
write_inode_now ( dest , 1 ) ;
}
2006-10-20 10:28:58 +04:00
err = 0 ;
2005-04-17 02:20:36 +04:00
} else {
2006-10-20 10:28:58 +04:00
if ( host_err = = - EXDEV & & rqstp - > rq_vers = = 2 )
2005-04-17 02:20:36 +04:00
err = nfserr_acces ;
else
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-16 01:37:45 +03:00
mnt_drop_write ( tfhp - > fh_export - > ex_path . mnt ) ;
out_dput :
2005-04-17 02:20:36 +04:00
dput ( dnew ) ;
2006-06-30 12:56:15 +04:00
out_unlock :
fh_unlock ( ffhp ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2006-06-30 12:56:15 +04:00
goto out_unlock ;
2005-04-17 02:20:36 +04:00
}
/*
* Rename a file
* N . B . After this call _both_ ffhp and tfhp need an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_rename ( struct svc_rqst * rqstp , struct svc_fh * ffhp , char * fname , int flen ,
struct svc_fh * tfhp , char * tname , int tlen )
{
struct dentry * fdentry , * tdentry , * odentry , * ndentry , * trap ;
struct inode * fdir , * tdir ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , ffhp , S_IFDIR , NFSD_MAY_REMOVE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , tfhp , S_IFDIR , NFSD_MAY_CREATE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
fdentry = ffhp - > fh_dentry ;
fdir = fdentry - > d_inode ;
tdentry = tfhp - > fh_dentry ;
tdir = tdentry - > d_inode ;
err = ( rqstp - > rq_vers = = 2 ) ? nfserr_acces : nfserr_xdev ;
2006-06-30 12:56:10 +04:00
if ( ffhp - > fh_export ! = tfhp - > fh_export )
2005-04-17 02:20:36 +04:00
goto out ;
err = nfserr_perm ;
if ( ! flen | | isdotent ( fname , flen ) | | ! tlen | | isdotent ( tname , tlen ) )
goto out ;
/* cannot use fh_lock as we need deadlock protective ordering
* so do it by hand */
trap = lock_rename ( tdentry , fdentry ) ;
ffhp - > fh_locked = tfhp - > fh_locked = 1 ;
fill_pre_wcc ( ffhp ) ;
fill_pre_wcc ( tfhp ) ;
odentry = lookup_one_len ( fname , fdentry , flen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( odentry ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( odentry ) )
goto out_nfserr ;
2006-10-20 10:28:58 +04:00
host_err = - ENOENT ;
2005-04-17 02:20:36 +04:00
if ( ! odentry - > d_inode )
goto out_dput_old ;
2006-10-20 10:28:58 +04:00
host_err = - EINVAL ;
2005-04-17 02:20:36 +04:00
if ( odentry = = trap )
goto out_dput_old ;
ndentry = lookup_one_len ( tname , tdentry , tlen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( ndentry ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( ndentry ) )
goto out_dput_old ;
2006-10-20 10:28:58 +04:00
host_err = - ENOTEMPTY ;
2005-04-17 02:20:36 +04:00
if ( ndentry = = trap )
goto out_dput_new ;
2008-02-16 01:37:49 +03:00
if ( svc_msnfs ( ffhp ) & &
2005-04-17 02:20:36 +04:00
( ( atomic_read ( & odentry - > d_count ) > 1 )
| | ( atomic_read ( & ndentry - > d_count ) > 1 ) ) ) {
2006-10-20 10:28:58 +04:00
host_err = - EPERM ;
2008-02-16 01:37:49 +03:00
goto out_dput_new ;
}
host_err = - EXDEV ;
if ( ffhp - > fh_export - > ex_path . mnt ! = tfhp - > fh_export - > ex_path . mnt )
goto out_dput_new ;
host_err = mnt_want_write ( ffhp - > fh_export - > ex_path . mnt ) ;
if ( host_err )
goto out_dput_new ;
2006-10-20 10:28:58 +04:00
host_err = vfs_rename ( fdir , odentry , tdir , ndentry ) ;
if ( ! host_err & & EX_ISSYNC ( tfhp - > fh_export ) ) {
host_err = nfsd_sync_dir ( tdentry ) ;
if ( ! host_err )
host_err = nfsd_sync_dir ( fdentry ) ;
2005-04-17 02:20:36 +04:00
}
2008-02-16 01:37:49 +03:00
mnt_drop_write ( ffhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
out_dput_new :
dput ( ndentry ) ;
out_dput_old :
dput ( odentry ) ;
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2005-04-17 02:20:36 +04:00
/* we cannot reply on fh_unlock on the two filehandles,
* as that would do the wrong thing if the two directories
* were the same , so again we do it by hand
*/
fill_post_wcc ( ffhp ) ;
fill_post_wcc ( tfhp ) ;
unlock_rename ( tdentry , fdentry ) ;
ffhp - > fh_locked = tfhp - > fh_locked = 0 ;
out :
return err ;
}
/*
* Unlink a file or directory
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_unlink ( struct svc_rqst * rqstp , struct svc_fh * fhp , int type ,
char * fname , int flen )
{
struct dentry * dentry , * rdentry ;
struct inode * dirp ;
2006-10-20 10:28:58 +04:00
__be32 err ;
int host_err ;
2005-04-17 02:20:36 +04:00
err = nfserr_acces ;
if ( ! flen | | isdotent ( fname , flen ) )
goto out ;
2008-06-16 15:20:29 +04:00
err = fh_verify ( rqstp , fhp , S_IFDIR , NFSD_MAY_REMOVE ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
2006-10-02 13:18:03 +04:00
fh_lock_nested ( fhp , I_MUTEX_PARENT ) ;
2005-04-17 02:20:36 +04:00
dentry = fhp - > fh_dentry ;
dirp = dentry - > d_inode ;
rdentry = lookup_one_len ( fname , dentry , flen ) ;
2006-10-20 10:28:58 +04:00
host_err = PTR_ERR ( rdentry ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( rdentry ) )
goto out_nfserr ;
if ( ! rdentry - > d_inode ) {
dput ( rdentry ) ;
err = nfserr_noent ;
goto out ;
}
if ( ! type )
type = rdentry - > d_inode - > i_mode & S_IFMT ;
2008-02-16 01:37:34 +03:00
host_err = mnt_want_write ( fhp - > fh_export - > ex_path . mnt ) ;
if ( host_err )
goto out_nfserr ;
2005-04-17 02:20:36 +04:00
if ( type ! = S_IFDIR ) { /* It's UNLINK */
# ifdef MSNFS
if ( ( fhp - > fh_export - > ex_flags & NFSEXP_MSNFS ) & &
( atomic_read ( & rdentry - > d_count ) > 1 ) ) {
2006-10-20 10:28:58 +04:00
host_err = - EPERM ;
2005-04-17 02:20:36 +04:00
} else
# endif
2006-10-20 10:28:58 +04:00
host_err = vfs_unlink ( dirp , rdentry ) ;
2005-04-17 02:20:36 +04:00
} else { /* It's RMDIR */
2006-10-20 10:28:58 +04:00
host_err = vfs_rmdir ( dirp , rdentry ) ;
2005-04-17 02:20:36 +04:00
}
dput ( rdentry ) ;
2006-10-20 10:28:58 +04:00
if ( host_err )
2008-02-16 01:37:34 +03:00
goto out_drop ;
2006-10-20 10:28:58 +04:00
if ( EX_ISSYNC ( fhp - > fh_export ) )
host_err = nfsd_sync_dir ( dentry ) ;
2005-04-17 02:20:36 +04:00
2008-02-16 01:37:34 +03:00
out_drop :
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-04-17 02:20:36 +04:00
out_nfserr :
2006-10-20 10:28:58 +04:00
err = nfserrno ( host_err ) ;
2006-01-19 04:43:13 +03:00
out :
return err ;
2005-04-17 02:20:36 +04:00
}
2008-07-31 23:29:12 +04:00
/*
* We do this buffering because we must not call back into the file
* system ' s - > lookup ( ) method from the filldir callback . That may well
* deadlock a number of file systems .
*
* This is based heavily on the implementation of same in XFS .
*/
struct buffered_dirent {
u64 ino ;
loff_t offset ;
int namlen ;
unsigned int d_type ;
char name [ ] ;
} ;
struct readdir_data {
char * dirent ;
size_t used ;
2008-08-24 15:29:52 +04:00
int full ;
2008-07-31 23:29:12 +04:00
} ;
static int nfsd_buffered_filldir ( void * __buf , const char * name , int namlen ,
loff_t offset , u64 ino , unsigned int d_type )
{
struct readdir_data * buf = __buf ;
struct buffered_dirent * de = ( void * ) ( buf - > dirent + buf - > used ) ;
unsigned int reclen ;
reclen = ALIGN ( sizeof ( struct buffered_dirent ) + namlen , sizeof ( u64 ) ) ;
2008-08-24 15:29:52 +04:00
if ( buf - > used + reclen > PAGE_SIZE ) {
buf - > full = 1 ;
2008-07-31 23:29:12 +04:00
return - EINVAL ;
2008-08-24 15:29:52 +04:00
}
2008-07-31 23:29:12 +04:00
de - > namlen = namlen ;
de - > offset = offset ;
de - > ino = ino ;
de - > d_type = d_type ;
memcpy ( de - > name , name , namlen ) ;
buf - > used + = reclen ;
return 0 ;
}
2009-04-21 02:18:37 +04:00
static __be32 nfsd_buffered_readdir ( struct file * file , filldir_t func ,
struct readdir_cd * cdp , loff_t * offsetp )
2008-07-31 20:16:51 +04:00
{
2008-07-31 23:29:12 +04:00
struct readdir_data buf ;
struct buffered_dirent * de ;
2008-07-31 20:16:51 +04:00
int host_err ;
2008-07-31 23:29:12 +04:00
int size ;
loff_t offset ;
2008-07-31 20:16:51 +04:00
2008-07-31 23:29:12 +04:00
buf . dirent = ( void * ) __get_free_page ( GFP_KERNEL ) ;
if ( ! buf . dirent )
2009-04-21 02:18:37 +04:00
return nfserrno ( - ENOMEM ) ;
2008-07-31 23:29:12 +04:00
offset = * offsetp ;
2008-07-31 20:16:51 +04:00
2008-07-31 23:29:12 +04:00
while ( 1 ) {
2009-04-21 02:18:37 +04:00
struct inode * dir_inode = file - > f_path . dentry - > d_inode ;
2008-07-31 23:29:12 +04:00
unsigned int reclen ;
Fix nfsd truncation of readdir results
Commit 8d7c4203 "nfsd: fix failure to set eof in readdir in some
situations" introduced a bug: on a directory in an exported ext3
filesystem with dir_index unset, a READDIR will only return about 250
entries, even if the directory was larger.
Bisected it back to this commit; reverting it fixes the problem.
It turns out that in this case ext3 reads a block at a time, then
returns from readdir, which means we can end up with buf.full==0 but
with more entries in the directory still to be read. Before 8d7c4203
(but after c002a6c797 "Optimise NFS readdir hack slightly"), this would
cause us to return the READDIR result immediately, but with the eof bit
unset. That could cause a performance regression (because the client
would need more roundtrips to the server to read the whole directory),
but no loss in correctness, since the cleared eof bit caused the client
to send another readdir. After 8d7c4203, the setting of the eof bit
made this a correctness problem.
So, move nfserr_eof into the loop and remove the buf.full check so that
we loop until buf.used==0. The following seems to do the right thing
and reduces the network traffic since we don't return a READDIR result
until the buffer is full.
Tested on an empty directory & large directory; eof is properly sent and
there are no more short buffers.
Signed-off-by: Doug Nazar <nazard@dragoninc.ca>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
2008-11-05 14:16:28 +03:00
cdp - > err = nfserr_eof ; /* will be cleared on successful read */
2008-07-31 23:29:12 +04:00
buf . used = 0 ;
2008-08-24 15:29:52 +04:00
buf . full = 0 ;
2008-07-31 23:29:12 +04:00
host_err = vfs_readdir ( file , nfsd_buffered_filldir , & buf ) ;
2008-08-24 15:29:52 +04:00
if ( buf . full )
host_err = 0 ;
if ( host_err < 0 )
2008-07-31 23:29:12 +04:00
break ;
size = buf . used ;
if ( ! size )
break ;
2009-04-21 02:18:37 +04:00
/*
* Various filldir functions may end up calling back into
* lookup_one_len ( ) and the file system ' s - > lookup ( ) method .
* These expect i_mutex to be held , as it would within readdir .
*/
host_err = mutex_lock_killable ( & dir_inode - > i_mutex ) ;
if ( host_err )
break ;
2008-07-31 23:29:12 +04:00
de = ( struct buffered_dirent * ) buf . dirent ;
while ( size > 0 ) {
offset = de - > offset ;
if ( func ( cdp , de - > name , de - > namlen , de - > offset ,
de - > ino , de - > d_type ) )
2009-04-21 02:18:37 +04:00
break ;
2008-07-31 23:29:12 +04:00
if ( cdp - > err ! = nfs_ok )
2009-04-21 02:18:37 +04:00
break ;
2008-07-31 23:29:12 +04:00
reclen = ALIGN ( sizeof ( * de ) + de - > namlen ,
sizeof ( u64 ) ) ;
size - = reclen ;
de = ( struct buffered_dirent * ) ( ( char * ) de + reclen ) ;
}
2009-04-21 02:18:37 +04:00
mutex_unlock ( & dir_inode - > i_mutex ) ;
if ( size > 0 ) /* We bailed out early */
break ;
2008-08-17 20:21:18 +04:00
offset = vfs_llseek ( file , 0 , SEEK_CUR ) ;
2008-07-31 23:29:12 +04:00
}
free_page ( ( unsigned long ) ( buf . dirent ) ) ;
2008-07-31 20:16:51 +04:00
if ( host_err )
return nfserrno ( host_err ) ;
2008-07-31 23:29:12 +04:00
* offsetp = offset ;
return cdp - > err ;
2008-07-31 20:16:51 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Read entries from a directory .
* The NFSv3 / 4 verifier we ignore for now .
*/
2006-10-20 10:28:58 +04:00
__be32
2005-04-17 02:20:36 +04:00
nfsd_readdir ( struct svc_rqst * rqstp , struct svc_fh * fhp , loff_t * offsetp ,
2007-01-26 11:57:10 +03:00
struct readdir_cd * cdp , filldir_t func )
2005-04-17 02:20:36 +04:00
{
2006-10-20 10:28:58 +04:00
__be32 err ;
2005-04-17 02:20:36 +04:00
struct file * file ;
loff_t offset = * offsetp ;
2008-06-16 15:20:29 +04:00
err = nfsd_open ( rqstp , fhp , S_IFDIR , NFSD_MAY_READ , & file ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out ;
offset = vfs_llseek ( file , offset , 0 ) ;
if ( offset < 0 ) {
err = nfserrno ( ( int ) offset ) ;
goto out_close ;
}
2008-07-31 23:29:12 +04:00
err = nfsd_buffered_readdir ( file , func , cdp , offsetp ) ;
2005-04-17 02:20:36 +04:00
if ( err = = nfserr_eof | | err = = nfserr_toosmall )
err = nfs_ok ; /* can still be found in ->err */
out_close :
nfsd_close ( file ) ;
out :
return err ;
}
/*
* Get file system stats
* N . B . After this call fhp needs an fh_put
*/
2006-10-20 10:28:58 +04:00
__be32
2008-08-07 21:00:20 +04:00
nfsd_statfs ( struct svc_rqst * rqstp , struct svc_fh * fhp , struct kstatfs * stat , int access )
2005-04-17 02:20:36 +04:00
{
2008-08-07 21:00:20 +04:00
__be32 err = fh_verify ( rqstp , fhp , 0 , NFSD_MAY_NOP | access ) ;
2006-06-23 13:02:58 +04:00
if ( ! err & & vfs_statfs ( fhp - > fh_dentry , stat ) )
2005-04-17 02:20:36 +04:00
err = nfserr_io ;
return err ;
}
2007-07-19 12:49:20 +04:00
static int exp_rdonly ( struct svc_rqst * rqstp , struct svc_export * exp )
2007-07-19 12:49:20 +04:00
{
2007-07-19 12:49:20 +04:00
return nfsexp_flags ( rqstp , exp ) & NFSEXP_READONLY ;
2007-07-19 12:49:20 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Check for a user ' s access permissions to this inode .
*/
2006-10-20 10:28:58 +04:00
__be32
2007-07-17 15:04:48 +04:00
nfsd_permission ( struct svc_rqst * rqstp , struct svc_export * exp ,
struct dentry * dentry , int acc )
2005-04-17 02:20:36 +04:00
{
struct inode * inode = dentry - > d_inode ;
int err ;
2008-06-16 15:20:29 +04:00
if ( acc = = NFSD_MAY_NOP )
2005-04-17 02:20:36 +04:00
return 0 ;
#if 0
dprintk ( " nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s \n " ,
acc ,
2008-06-16 15:20:29 +04:00
( acc & NFSD_MAY_READ ) ? " read " : " " ,
( acc & NFSD_MAY_WRITE ) ? " write " : " " ,
( acc & NFSD_MAY_EXEC ) ? " exec " : " " ,
( acc & NFSD_MAY_SATTR ) ? " sattr " : " " ,
( acc & NFSD_MAY_TRUNC ) ? " trunc " : " " ,
( acc & NFSD_MAY_LOCK ) ? " lock " : " " ,
( acc & NFSD_MAY_OWNER_OVERRIDE ) ? " owneroverride " : " " ,
2005-04-17 02:20:36 +04:00
inode - > i_mode ,
IS_IMMUTABLE ( inode ) ? " immut " : " " ,
IS_APPEND ( inode ) ? " append " : " " ,
2008-02-16 01:37:56 +03:00
__mnt_is_readonly ( exp - > ex_path . mnt ) ? " ro " : " " ) ;
2005-04-17 02:20:36 +04:00
dprintk ( " owner %d/%d user %d/%d \n " ,
2008-11-14 02:38:58 +03:00
inode - > i_uid , inode - > i_gid , current_fsuid ( ) , current_fsgid ( ) ) ;
2005-04-17 02:20:36 +04:00
# endif
/* Normally we reject any write/sattr etc access on a read-only file
* system . But if it is IRIX doing check on write - access for a
* device special file , we ignore rofs .
*/
2008-06-16 15:20:29 +04:00
if ( ! ( acc & NFSD_MAY_LOCAL_ACCESS ) )
if ( acc & ( NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC ) ) {
2008-02-16 01:37:56 +03:00
if ( exp_rdonly ( rqstp , exp ) | |
__mnt_is_readonly ( exp - > ex_path . mnt ) )
2005-04-17 02:20:36 +04:00
return nfserr_rofs ;
2008-06-16 15:20:29 +04:00
if ( /* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE ( inode ) )
2005-04-17 02:20:36 +04:00
return nfserr_perm ;
}
2008-06-16 15:20:29 +04:00
if ( ( acc & NFSD_MAY_TRUNC ) & & IS_APPEND ( inode ) )
2005-04-17 02:20:36 +04:00
return nfserr_perm ;
2008-06-16 15:20:29 +04:00
if ( acc & NFSD_MAY_LOCK ) {
2005-04-17 02:20:36 +04:00
/* If we cannot rely on authentication in NLM requests,
* just allow locks , otherwise require read permission , or
* ownership
*/
if ( exp - > ex_flags & NFSEXP_NOAUTHNLM )
return 0 ;
else
2008-06-16 15:20:29 +04:00
acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE ;
2005-04-17 02:20:36 +04:00
}
/*
* The file owner always gets access permission for accesses that
* would normally be checked at open time . This is to make
* file access work even when the client has done a fchmod ( fd , 0 ) .
*
* However , ` cp foo bar ' should fail nevertheless when bar is
* readonly . A sensible way to do this might be to reject all
* attempts to truncate a read - only file , because a creat ( ) call
* always implies file truncation .
* . . . but this isn ' t really fair . A process may reasonably call
* ftruncate on an open file descriptor on a file with perm 000.
* We must trust the client to do permission checking - using " ACCESS "
* with NFSv3 .
*/
2008-06-16 15:20:29 +04:00
if ( ( acc & NFSD_MAY_OWNER_OVERRIDE ) & &
2008-11-14 02:38:58 +03:00
inode - > i_uid = = current_fsuid ( ) )
2005-04-17 02:20:36 +04:00
return 0 ;
2008-06-16 15:20:29 +04:00
/* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
2008-07-22 08:07:17 +04:00
err = inode_permission ( inode , acc & ( MAY_READ | MAY_WRITE | MAY_EXEC ) ) ;
2005-04-17 02:20:36 +04:00
/* Allow read access to binaries even when mode 111 */
if ( err = = - EACCES & & S_ISREG ( inode - > i_mode ) & &
2008-06-16 15:20:29 +04:00
acc = = ( NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE ) )
2008-07-22 08:07:17 +04:00
err = inode_permission ( inode , MAY_EXEC ) ;
2005-04-17 02:20:36 +04:00
return err ? nfserrno ( err ) : 0 ;
}
void
nfsd_racache_shutdown ( void )
{
2008-08-14 06:03:27 +04:00
struct raparms * raparm , * last_raparm ;
unsigned int i ;
2005-04-17 02:20:36 +04:00
dprintk ( " nfsd: freeing readahead buffers. \n " ) ;
2008-08-14 06:03:27 +04:00
for ( i = 0 ; i < RAPARM_HASH_SIZE ; i + + ) {
raparm = raparm_hash [ i ] . pb_head ;
while ( raparm ) {
last_raparm = raparm ;
raparm = raparm - > p_next ;
kfree ( last_raparm ) ;
}
raparm_hash [ i ] . pb_head = NULL ;
}
2005-04-17 02:20:36 +04:00
}
/*
* Initialize readahead param cache
*/
int
nfsd_racache_init ( int cache_size )
{
int i ;
2006-10-04 13:15:49 +04:00
int j = 0 ;
int nperbucket ;
2008-08-14 06:03:27 +04:00
struct raparms * * raparm = NULL ;
2005-04-17 02:20:36 +04:00
2006-10-04 13:15:49 +04:00
2008-08-14 06:03:27 +04:00
if ( raparm_hash [ 0 ] . pb_head )
2005-04-17 02:20:36 +04:00
return 0 ;
2008-08-14 06:03:27 +04:00
nperbucket = DIV_ROUND_UP ( cache_size , RAPARM_HASH_SIZE ) ;
if ( nperbucket < 2 )
nperbucket = 2 ;
cache_size = nperbucket * RAPARM_HASH_SIZE ;
2006-12-08 13:39:41 +03:00
dprintk ( " nfsd: allocating %d readahead buffers. \n " , cache_size ) ;
2008-08-14 06:03:27 +04:00
for ( i = 0 ; i < RAPARM_HASH_SIZE ; i + + ) {
2006-12-08 13:39:41 +03:00
spin_lock_init ( & raparm_hash [ i ] . pb_lock ) ;
2008-08-14 06:03:27 +04:00
raparm = & raparm_hash [ i ] . pb_head ;
for ( j = 0 ; j < nperbucket ; j + + ) {
* raparm = kzalloc ( sizeof ( struct raparms ) , GFP_KERNEL ) ;
if ( ! * raparm )
goto out_nomem ;
raparm = & ( * raparm ) - > p_next ;
}
* raparm = NULL ;
2006-12-08 13:39:41 +03:00
}
2005-04-17 02:20:36 +04:00
nfsdstats . ra_size = cache_size ;
return 0 ;
2008-08-14 06:03:27 +04:00
out_nomem :
dprintk ( " nfsd: kmalloc failed, freeing readahead buffers \n " ) ;
nfsd_racache_shutdown ( ) ;
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
}
2005-06-22 21:16:26 +04:00
# if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
struct posix_acl *
nfsd_get_posix_acl ( struct svc_fh * fhp , int type )
{
struct inode * inode = fhp - > fh_dentry - > d_inode ;
char * name ;
void * value = NULL ;
ssize_t size ;
struct posix_acl * acl ;
2006-01-10 07:51:55 +03:00
if ( ! IS_POSIXACL ( inode ) )
return ERR_PTR ( - EOPNOTSUPP ) ;
switch ( type ) {
case ACL_TYPE_ACCESS :
name = POSIX_ACL_XATTR_ACCESS ;
break ;
case ACL_TYPE_DEFAULT :
name = POSIX_ACL_XATTR_DEFAULT ;
break ;
default :
2005-06-22 21:16:26 +04:00
return ERR_PTR ( - EOPNOTSUPP ) ;
}
2006-01-10 07:51:55 +03:00
size = nfsd_getxattr ( fhp - > fh_dentry , name , & value ) ;
if ( size < 0 )
return ERR_PTR ( size ) ;
2005-06-22 21:16:26 +04:00
acl = posix_acl_from_xattr ( value , size ) ;
kfree ( value ) ;
return acl ;
}
int
nfsd_set_posix_acl ( struct svc_fh * fhp , int type , struct posix_acl * acl )
{
struct inode * inode = fhp - > fh_dentry - > d_inode ;
char * name ;
void * value = NULL ;
size_t size ;
int error ;
2008-12-04 18:06:33 +03:00
if ( ! IS_POSIXACL ( inode ) | |
2005-06-22 21:16:26 +04:00
! inode - > i_op - > setxattr | | ! inode - > i_op - > removexattr )
return - EOPNOTSUPP ;
switch ( type ) {
case ACL_TYPE_ACCESS :
2005-06-29 07:44:58 +04:00
name = POSIX_ACL_XATTR_ACCESS ;
2005-06-22 21:16:26 +04:00
break ;
case ACL_TYPE_DEFAULT :
2005-06-29 07:44:58 +04:00
name = POSIX_ACL_XATTR_DEFAULT ;
2005-06-22 21:16:26 +04:00
break ;
default :
return - EOPNOTSUPP ;
}
if ( acl & & acl - > a_count ) {
2005-06-29 07:44:58 +04:00
size = posix_acl_xattr_size ( acl - > a_count ) ;
2005-06-22 21:16:26 +04:00
value = kmalloc ( size , GFP_KERNEL ) ;
if ( ! value )
return - ENOMEM ;
2006-05-21 01:59:58 +04:00
error = posix_acl_to_xattr ( acl , value , size ) ;
if ( error < 0 )
2005-06-22 21:16:26 +04:00
goto getout ;
2006-05-21 01:59:58 +04:00
size = error ;
2005-06-22 21:16:26 +04:00
} else
size = 0 ;
2008-02-16 01:37:38 +03:00
error = mnt_want_write ( fhp - > fh_export - > ex_path . mnt ) ;
if ( error )
goto getout ;
2005-06-22 21:16:26 +04:00
if ( size )
2006-01-10 07:51:55 +03:00
error = vfs_setxattr ( fhp - > fh_dentry , name , value , size , 0 ) ;
2005-06-22 21:16:26 +04:00
else {
if ( ! S_ISDIR ( inode - > i_mode ) & & type = = ACL_TYPE_DEFAULT )
error = 0 ;
else {
2006-01-10 07:51:55 +03:00
error = vfs_removexattr ( fhp - > fh_dentry , name ) ;
2005-06-22 21:16:26 +04:00
if ( error = = - ENODATA )
error = 0 ;
}
}
2008-02-16 01:37:38 +03:00
mnt_drop_write ( fhp - > fh_export - > ex_path . mnt ) ;
2005-06-22 21:16:26 +04:00
getout :
kfree ( value ) ;
return error ;
}
# endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */