2005-04-16 15:20:36 -07:00
/*
2007-09-20 17:31:38 +09:00
* fs / sysfs / inode . c - basic sysfs inode and dentry operations
2005-04-16 15:20:36 -07:00
*
2007-09-20 17:31:38 +09:00
* Copyright ( c ) 2001 - 3 Patrick Mochel
* Copyright ( c ) 2007 SUSE Linux Products GmbH
* Copyright ( c ) 2007 Tejun Heo < teheo @ suse . de >
*
* This file is released under the GPLv2 .
2005-04-16 15:20:36 -07:00
*
* Please see Documentation / filesystems / sysfs . txt for more information .
*/
# undef DEBUG
# include <linux/pagemap.h>
# include <linux/namei.h>
# include <linux/backing-dev.h>
2006-01-11 12:17:46 -08:00
# include <linux/capability.h>
2006-07-10 23:05:25 -07:00
# include <linux/errno.h>
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
# include <linux/sched.h>
2005-04-16 15:20:36 -07:00
# include "sysfs.h"
extern struct super_block * sysfs_sb ;
2006-06-28 04:26:44 -07:00
static const struct address_space_operations sysfs_aops = {
2005-04-16 15:20:36 -07:00
. readpage = simple_readpage ,
2007-10-16 01:25:03 -07:00
. write_begin = simple_write_begin ,
. write_end = simple_write_end ,
2005-04-16 15:20:36 -07:00
} ;
static struct backing_dev_info sysfs_backing_dev_info = {
. ra_pages = 0 , /* No readahead */
2008-04-30 00:54:37 -07:00
. capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK ,
2005-04-16 15:20:36 -07:00
} ;
2007-02-12 00:55:40 -08:00
static const struct inode_operations sysfs_inode_operations = {
2005-05-31 10:39:14 +05:30
. setattr = sysfs_setattr ,
} ;
2007-10-16 23:25:46 -07:00
int __init sysfs_inode_init ( void )
{
return bdi_init ( & sysfs_backing_dev_info ) ;
}
2005-05-31 10:39:14 +05:30
int sysfs_setattr ( struct dentry * dentry , struct iattr * iattr )
{
struct inode * inode = dentry - > d_inode ;
struct sysfs_dirent * sd = dentry - > d_fsdata ;
struct iattr * sd_iattr ;
unsigned int ia_valid = iattr - > ia_valid ;
int error ;
if ( ! sd )
return - EINVAL ;
sd_iattr = sd - > s_iattr ;
error = inode_change_ok ( inode , iattr ) ;
if ( error )
return error ;
sysfs: Disallow truncation of files in sysfs
sysfs allows attribute files to be truncated, e.g. using ftruncate(), with the
expected effect on their inode. For most attributes, this doesn't change the
"real" size of the file i.e. how much can be read from it. However, the
parameter validation for reading and writing binary attribute files is based
on the inode size and not the size specified in the file's bin_attribute, so it
can be broken by this. For example, if we try using dd to write to such a file:
# pwd
/sys/bus/pci/devices/0000:08:00.0
# ls -l config
-rw-r--r-- 1 root root 4096 Feb 1 17:35 config
# dd if=/dev/zero of=config bs=4 count=1
1+0 records in
1+0 records out
# ls -l config
-rw-r--r-- 1 root root 0 Feb 1 17:50 config
# dd if=/dev/zero of=config bs=4 count=1 seek=128
dd: writing `config': No space left on device
1+0 records in
0+0 records out
Also, after truncation to 0, parameter validation for read and write is
disabled. Most bin_attribute read and write methods also validate the size and
offset, but for some this will allow out-of-range access. This may be a
security issue, though access to such files is often limited to root. In any
case, the validation should remain for safety's sake!)
This was previously reported in Bugzilla as bug 9867.
sysfs should ignore size changes or else refuse them (by returning -EINVAL).
This patch makes it ignore them.
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2008-04-28 15:59:58 +01:00
iattr - > ia_valid & = ~ ATTR_SIZE ; /* ignore size changes */
2005-05-31 10:39:14 +05:30
error = inode_setattr ( inode , iattr ) ;
if ( error )
return error ;
if ( ! sd_iattr ) {
/* setting attributes for the first time, allocate now */
2006-02-22 11:18:15 +01:00
sd_iattr = kzalloc ( sizeof ( struct iattr ) , GFP_KERNEL ) ;
2005-05-31 10:39:14 +05:30
if ( ! sd_iattr )
return - ENOMEM ;
/* assign default attributes */
sd_iattr - > ia_mode = sd - > s_mode ;
sd_iattr - > ia_uid = 0 ;
sd_iattr - > ia_gid = 0 ;
sd_iattr - > ia_atime = sd_iattr - > ia_mtime = sd_iattr - > ia_ctime = CURRENT_TIME ;
sd - > s_iattr = sd_iattr ;
}
/* attributes were changed atleast once in past */
if ( ia_valid & ATTR_UID )
sd_iattr - > ia_uid = iattr - > ia_uid ;
if ( ia_valid & ATTR_GID )
sd_iattr - > ia_gid = iattr - > ia_gid ;
if ( ia_valid & ATTR_ATIME )
sd_iattr - > ia_atime = timespec_trunc ( iattr - > ia_atime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_MTIME )
sd_iattr - > ia_mtime = timespec_trunc ( iattr - > ia_mtime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_CTIME )
sd_iattr - > ia_ctime = timespec_trunc ( iattr - > ia_ctime ,
inode - > i_sb - > s_time_gran ) ;
if ( ia_valid & ATTR_MODE ) {
umode_t mode = iattr - > ia_mode ;
if ( ! in_group_p ( inode - > i_gid ) & & ! capable ( CAP_FSETID ) )
mode & = ~ S_ISGID ;
2005-07-29 12:14:19 -07:00
sd_iattr - > ia_mode = sd - > s_mode = mode ;
2005-05-31 10:39:14 +05:30
}
return error ;
}
2005-05-31 10:39:52 +05:30
static inline void set_default_inode_attr ( struct inode * inode , mode_t mode )
{
inode - > i_mode = mode ;
inode - > i_atime = inode - > i_mtime = inode - > i_ctime = CURRENT_TIME ;
}
static inline void set_inode_attr ( struct inode * inode , struct iattr * iattr )
{
inode - > i_mode = iattr - > ia_mode ;
inode - > i_uid = iattr - > ia_uid ;
inode - > i_gid = iattr - > ia_gid ;
inode - > i_atime = iattr - > ia_atime ;
inode - > i_mtime = iattr - > ia_mtime ;
inode - > i_ctime = iattr - > ia_ctime ;
}
2006-07-12 09:03:06 -07:00
/*
* sysfs has a different i_mutex lock order behavior for i_mutex than other
* filesystems ; sysfs i_mutex is called in many places with subsystem locks
* held . At the same time , many of the VFS locking rules do not apply to
* sysfs at all ( cross directory rename for example ) . To untangle this mess
* ( which gives false positives in lockdep ) , we ' re giving sysfs inodes their
* own class for i_mutex .
*/
static struct lock_class_key sysfs_inode_imutex_key ;
2007-08-20 21:36:29 +09:00
static int sysfs_count_nlink ( struct sysfs_dirent * sd )
{
struct sysfs_dirent * child ;
int nr = 0 ;
2007-09-20 16:05:12 +09:00
for ( child = sd - > s_dir . children ; child ; child = child - > s_sibling )
2007-08-20 21:36:29 +09:00
if ( sysfs_type ( child ) = = SYSFS_DIR )
nr + + ;
return nr + 2 ;
}
2007-07-18 14:30:28 +09:00
static void sysfs_init_inode ( struct sysfs_dirent * sd , struct inode * inode )
2005-04-16 15:20:36 -07:00
{
2007-08-20 21:36:29 +09:00
struct bin_attribute * bin_attr ;
2009-02-11 13:20:23 -08:00
inode - > i_private = sysfs_get ( sd ) ;
2007-06-14 03:45:17 +09:00
inode - > i_mapping - > a_ops = & sysfs_aops ;
inode - > i_mapping - > backing_dev_info = & sysfs_backing_dev_info ;
inode - > i_op = & sysfs_inode_operations ;
inode - > i_ino = sd - > s_ino ;
lockdep_set_class ( & inode - > i_mutex , & sysfs_inode_imutex_key ) ;
if ( sd - > s_iattr ) {
/* sysfs_dirent has non-default attributes
* get them for the new inode from persistent copy
* in sysfs_dirent
*/
set_inode_attr ( inode , sd - > s_iattr ) ;
} else
set_default_inode_attr ( inode , sd - > s_mode ) ;
2007-08-20 21:36:29 +09:00
/* initialize inode according to type */
switch ( sysfs_type ( sd ) ) {
case SYSFS_DIR :
inode - > i_op = & sysfs_dir_inode_operations ;
inode - > i_fop = & sysfs_dir_operations ;
inode - > i_nlink = sysfs_count_nlink ( sd ) ;
break ;
case SYSFS_KOBJ_ATTR :
inode - > i_size = PAGE_SIZE ;
inode - > i_fop = & sysfs_file_operations ;
break ;
case SYSFS_KOBJ_BIN_ATTR :
2007-09-20 16:05:11 +09:00
bin_attr = sd - > s_bin_attr . bin_attr ;
2007-08-20 21:36:29 +09:00
inode - > i_size = bin_attr - > size ;
inode - > i_fop = & bin_fops ;
break ;
case SYSFS_KOBJ_LINK :
inode - > i_op = & sysfs_symlink_inode_operations ;
break ;
default :
BUG ( ) ;
}
unlock_new_inode ( inode ) ;
2007-06-14 03:45:17 +09:00
}
/**
2007-06-14 03:45:17 +09:00
* sysfs_get_inode - get inode for sysfs_dirent
2007-06-14 03:45:17 +09:00
* @ sd : sysfs_dirent to allocate inode for
*
2007-06-14 03:45:17 +09:00
* Get inode for @ sd . If such inode doesn ' t exist , a new inode
* is allocated and basics are initialized . New inode is
* returned locked .
2007-06-14 03:45:17 +09:00
*
* LOCKING :
* Kernel thread context ( may sleep ) .
*
* RETURNS :
* Pointer to allocated inode on success , NULL on failure .
*/
2007-06-14 03:45:17 +09:00
struct inode * sysfs_get_inode ( struct sysfs_dirent * sd )
2007-06-14 03:45:17 +09:00
{
struct inode * inode ;
2007-06-14 03:45:17 +09:00
inode = iget_locked ( sysfs_sb , sd - > s_ino ) ;
if ( inode & & ( inode - > i_state & I_NEW ) )
2007-06-14 03:45:17 +09:00
sysfs_init_inode ( sd , inode ) ;
2005-04-16 15:20:36 -07:00
return inode ;
}
2009-02-11 13:20:23 -08:00
/*
* The sysfs_dirent serves as both an inode and a directory entry for sysfs .
* To prevent the sysfs inode numbers from being freed prematurely we take a
* reference to sysfs_dirent from the sysfs inode . A
* super_operations . delete_inode ( ) implementation is needed to drop that
* reference upon inode destruction .
*/
void sysfs_delete_inode ( struct inode * inode )
{
struct sysfs_dirent * sd = inode - > i_private ;
truncate_inode_pages ( & inode - > i_data , 0 ) ;
clear_inode ( inode ) ;
sysfs_put ( sd ) ;
}
2007-06-14 04:27:22 +09:00
int sysfs_hash_and_remove ( struct sysfs_dirent * dir_sd , const char * name )
2005-04-16 15:20:36 -07:00
{
2007-06-14 04:27:24 +09:00
struct sysfs_addrm_cxt acxt ;
2007-08-02 21:38:03 +09:00
struct sysfs_dirent * sd ;
2006-03-16 15:44:26 -08:00
2007-06-14 04:27:22 +09:00
if ( ! dir_sd )
2006-07-10 23:05:25 -07:00
return - ENOENT ;
2005-04-16 15:20:36 -07:00
2007-06-14 04:27:24 +09:00
sysfs_addrm_start ( & acxt , dir_sd ) ;
2007-06-14 04:27:22 +09:00
2007-08-02 21:38:03 +09:00
sd = sysfs_find_dirent ( dir_sd , name ) ;
if ( sd )
sysfs_remove_one ( & acxt , sd ) ;
2007-06-14 04:27:23 +09:00
2007-08-02 21:38:03 +09:00
sysfs_addrm_finish ( & acxt ) ;
if ( sd )
2007-06-14 04:27:24 +09:00
return 0 ;
2007-08-02 21:38:03 +09:00
else
return - ENOENT ;
2005-04-16 15:20:36 -07:00
}