2005-12-15 14:29:43 -08:00
/* -*- mode: c; c-basic-offset:8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* configfs_internal . h - Internal stuff for configfs
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*
* Based on sysfs :
* sysfs is Copyright ( C ) 2001 , 2002 , 2003 Patrick Mochel
*
* configfs Copyright ( C ) 2005 Oracle . All rights reserved .
*/
# include <linux/slab.h>
# include <linux/list.h>
2008-06-16 19:00:58 +02:00
# include <linux/spinlock.h>
2005-12-15 14:29:43 -08:00
struct configfs_dirent {
atomic_t s_count ;
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-18 18:06:09 -07:00
int s_dependent_count ;
2005-12-15 14:29:43 -08:00
struct list_head s_sibling ;
struct list_head s_children ;
struct list_head s_links ;
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-18 18:06:09 -07:00
void * s_element ;
2005-12-15 14:29:43 -08:00
int s_type ;
umode_t s_mode ;
struct dentry * s_dentry ;
2006-01-25 13:31:07 -08:00
struct iattr * s_iattr ;
configfs: Silence lockdep on mkdir() and rmdir()
When attaching default groups (subdirs) of a new group (in mkdir() or
in configfs_register()), configfs recursively takes inode's mutexes
along the path from the parent of the new group to the default
subdirs. This is needed to ensure that the VFS will not race with
operations on these sub-dirs. This is safe for the following reasons:
- the VFS allows one to lock first an inode and second one of its
children (The lock subclasses for this pattern are respectively
I_MUTEX_PARENT and I_MUTEX_CHILD);
- from this rule any inode path can be recursively locked in
descending order as long as it stays under a single mountpoint and
does not follow symlinks.
Unfortunately lockdep does not know (yet?) how to handle such
recursion.
I've tried to use Peter Zijlstra's lock_set_subclass() helper to
upgrade i_mutexes from I_MUTEX_CHILD to I_MUTEX_PARENT when we know
that we might recursively lock some of their descendant, but this
usage does not seem to fit the purpose of lock_set_subclass() because
it leads to several i_mutex locked with subclass I_MUTEX_PARENT by
the same task.
>From inside configfs it is not possible to serialize those recursive
locking with a top-level one, because mkdir() and rmdir() are already
called with inodes locked by the VFS. So using some
mutex_lock_nest_lock() is not an option.
I am proposing two solutions:
1) one that wraps recursive mutex_lock()s with
lockdep_off()/lockdep_on().
2) (as suggested earlier by Peter Zijlstra) one that puts the
i_mutexes recursively locked in different classes based on their
depth from the top-level config_group created. This
induces an arbitrary limit (MAX_LOCK_DEPTH - 2 == 46) on the
nesting of configfs default groups whenever lockdep is activated
but this limit looks reasonably high. Unfortunately, this also
isolates VFS operations on configfs default groups from the others
and thus lowers the chances to detect locking issues.
Nobody likes solution 1), which I can understand.
This patch implements solution 2). However lockdep is still not happy with
configfs_depend_item(). Next patch reworks the locking of
configfs_depend_item() and finally makes lockdep happy.
[ Note: This hides a few locking interactions with the VFS from lockdep.
That was my big concern, because we like lockdep's protection. However,
the current state always dumps a spurious warning. The locking is
correct, so I tell people to ignore the warning and that we'll keep
our eyes on the locking to make sure it stays correct. With this patch,
we eliminate the warning. We do lose some of the lockdep protections,
but this only means that we still have to keep our eyes on the locking.
We're going to do that anyway. -- Joel ]
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2009-01-28 19:18:32 +01:00
# ifdef CONFIG_LOCKDEP
int s_depth ;
# endif
2005-12-15 14:29:43 -08:00
} ;
# define CONFIGFS_ROOT 0x0001
# define CONFIGFS_DIR 0x0002
configfs: config item dependancies.
Sometimes other drivers depend on particular configfs items. For
example, ocfs2 mounts depend on a heartbeat region item. If that
region item is removed with rmdir(2), the ocfs2 mount must BUG or go
readonly. Not happy.
This provides two additional API calls: configfs_depend_item() and
configfs_undepend_item(). A client driver can call
configfs_depend_item() on an existing item to tell configfs that it is
depended on. configfs will then return -EBUSY from rmdir(2) for that
item. When the item is no longer depended on, the client driver calls
configfs_undepend_item() on it.
These API cannot be called underneath any configfs callbacks, as
they will conflict. They can block and allocate. A client driver
probably shouldn't calling them of its own gumption. Rather it should
be providing an API that external subsystems call.
How does this work? Imagine the ocfs2 mount process. When it mounts,
it asks for a heart region item. This is done via a call into the
heartbeat code. Inside the heartbeat code, the region item is looked
up. Here, the heartbeat code calls configfs_depend_item(). If it
succeeds, then heartbeat knows the region is safe to give to ocfs2.
If it fails, it was being torn down anyway, and heartbeat can gracefully
pass up an error.
[ Fixed some bad whitespace in configfs.txt. --Mark ]
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
2007-06-18 18:06:09 -07:00
# define CONFIGFS_ITEM_ATTR 0x0004
# define CONFIGFS_ITEM_LINK 0x0020
2005-12-15 14:29:43 -08:00
# define CONFIGFS_USET_DIR 0x0040
# define CONFIGFS_USET_DEFAULT 0x0080
# define CONFIGFS_USET_DROPPING 0x0100
2008-06-16 19:01:02 +02:00
# define CONFIGFS_USET_IN_MKDIR 0x0200
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 16:56:05 +02:00
# define CONFIGFS_USET_CREATING 0x0400
2005-12-15 14:29:43 -08:00
# define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
2008-06-20 14:09:22 +02:00
extern struct mutex configfs_symlink_mutex ;
2008-06-16 19:00:58 +02:00
extern spinlock_t configfs_dirent_lock ;
2005-12-15 14:29:43 -08:00
extern struct vfsmount * configfs_mount ;
2006-12-06 20:33:20 -08:00
extern struct kmem_cache * configfs_dir_cachep ;
2005-12-15 14:29:43 -08:00
extern int configfs_is_root ( struct config_item * item ) ;
2011-07-25 00:05:26 -04:00
extern struct inode * configfs_new_inode ( umode_t mode , struct configfs_dirent * ) ;
extern int configfs_create ( struct dentry * , umode_t mode , int ( * init ) ( struct inode * ) ) ;
2007-10-16 23:25:46 -07:00
extern int configfs_inode_init ( void ) ;
extern void configfs_inode_exit ( void ) ;
2005-12-15 14:29:43 -08:00
extern int configfs_create_file ( struct config_item * , const struct configfs_attribute * ) ;
extern int configfs_make_dirent ( struct configfs_dirent * ,
struct dentry * , void * , umode_t , int ) ;
[PATCH] configfs: Prevent userspace from creating new entries under attaching directories
process 1: process 2:
configfs_mkdir("A")
attach_group("A")
attach_item("A")
d_instantiate("A")
populate_groups("A")
mutex_lock("A")
attach_group("A/B")
attach_item("A")
d_instantiate("A/B")
mkdir("A/B/C")
do_path_lookup("A/B/C", LOOKUP_PARENT)
ok
lookup_create("A/B/C")
mutex_lock("A/B")
ok
configfs_mkdir("A/B/C")
ok
attach_group("A/C")
attach_item("A/C")
d_instantiate("A/C")
populate_groups("A/C")
mutex_lock("A/C")
attach_group("A/C/D")
attach_item("A/C/D")
failure
mutex_unlock("A/C")
detach_groups("A/C")
nothing to do
mkdir("A/C/E")
do_path_lookup("A/C/E", LOOKUP_PARENT)
ok
lookup_create("A/C/E")
mutex_lock("A/C")
ok
configfs_mkdir("A/C/E")
ok
detach_item("A/C")
d_delete("A/C")
mutex_unlock("A")
detach_groups("A")
mutex_lock("A/B")
detach_group("A/B")
detach_groups("A/B")
nothing since no _default_ group
detach_item("A/B")
mutex_unlock("A/B")
d_delete("A/B")
detach_item("A")
d_delete("A")
Two bugs:
1/ "A/B/C" and "A/C/E" are created, but never removed while their parent are
removed in the end. The same could happen with symlink() instead of mkdir().
2/ "A" and "A/C" inodes are not locked while detach_item() is called on them,
which may probably confuse VFS.
This commit fixes 1/, tagging new directories with CONFIGFS_USET_CREATING before
building the inode and instantiating the dentry, and validating the whole
group+default groups hierarchy in a second pass by clearing
CONFIGFS_USET_CREATING.
mkdir(), symlink(), lookup(), and dir_open() simply return -ENOENT if
called in (or linking to) a directory tagged with CONFIGFS_USET_CREATING. This
does not prevent userspace from calling stat() successfuly on such directories,
but this prevents userspace from adding (children to | symlinking from/to |
read/write attributes of | listing the contents of) not validated items. In
other words, userspace will not interact with the subsystem on a new item until
the new item creation completes correctly.
It was first proposed to re-use CONFIGFS_USET_IN_MKDIR instead of a new
flag CONFIGFS_USET_CREATING, but this generated conflicts when checking the
target of a new symlink: a valid target directory in the middle of attaching
a new user-created child item could be wrongly detected as being attached.
2/ is fixed by next commit.
Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-04 16:56:05 +02:00
extern int configfs_dirent_is_ready ( struct configfs_dirent * ) ;
2005-12-15 14:29:43 -08:00
extern int configfs_add_file ( struct dentry * , const struct configfs_attribute * , int ) ;
extern void configfs_hash_and_remove ( struct dentry * dir , const char * name ) ;
extern const unsigned char * configfs_get_name ( struct configfs_dirent * sd ) ;
extern void configfs_drop_dentry ( struct configfs_dirent * sd , struct dentry * parent ) ;
2006-01-25 13:31:07 -08:00
extern int configfs_setattr ( struct dentry * dentry , struct iattr * iattr ) ;
2005-12-15 14:29:43 -08:00
extern int configfs_pin_fs ( void ) ;
extern void configfs_release_fs ( void ) ;
extern struct rw_semaphore configfs_rename_sem ;
extern struct super_block * configfs_sb ;
2006-03-28 01:56:42 -08:00
extern const struct file_operations configfs_dir_operations ;
extern const struct file_operations configfs_file_operations ;
extern const struct file_operations bin_fops ;
2007-02-12 00:55:38 -08:00
extern const struct inode_operations configfs_dir_inode_operations ;
extern const struct inode_operations configfs_symlink_inode_operations ;
2011-01-12 16:41:05 -05:00
extern const struct dentry_operations configfs_dentry_ops ;
2005-12-15 14:29:43 -08:00
extern int configfs_symlink ( struct inode * dir , struct dentry * dentry ,
const char * symname ) ;
extern int configfs_unlink ( struct inode * dir , struct dentry * dentry ) ;
struct configfs_symlink {
struct list_head sl_list ;
struct config_item * sl_target ;
} ;
extern int configfs_create_link ( struct configfs_symlink * sl ,
struct dentry * parent ,
struct dentry * dentry ) ;
static inline struct config_item * to_item ( struct dentry * dentry )
{
struct configfs_dirent * sd = dentry - > d_fsdata ;
return ( ( struct config_item * ) sd - > s_element ) ;
}
static inline struct configfs_attribute * to_attr ( struct dentry * dentry )
{
struct configfs_dirent * sd = dentry - > d_fsdata ;
return ( ( struct configfs_attribute * ) sd - > s_element ) ;
}
static inline struct config_item * configfs_get_config_item ( struct dentry * dentry )
{
struct config_item * item = NULL ;
2011-01-07 17:49:33 +11:00
spin_lock ( & dentry - > d_lock ) ;
2005-12-15 14:29:43 -08:00
if ( ! d_unhashed ( dentry ) ) {
struct configfs_dirent * sd = dentry - > d_fsdata ;
if ( sd - > s_type & CONFIGFS_ITEM_LINK ) {
struct configfs_symlink * sl = sd - > s_element ;
item = config_item_get ( sl - > sl_target ) ;
} else
item = config_item_get ( sd - > s_element ) ;
}
2011-01-07 17:49:33 +11:00
spin_unlock ( & dentry - > d_lock ) ;
2005-12-15 14:29:43 -08:00
return item ;
}
static inline void release_configfs_dirent ( struct configfs_dirent * sd )
{
2006-01-25 13:31:07 -08:00
if ( ! ( sd - > s_type & CONFIGFS_ROOT ) ) {
kfree ( sd - > s_iattr ) ;
kmem_cache_free ( configfs_dir_cachep , sd ) ;
}
2005-12-15 14:29:43 -08:00
}
static inline struct configfs_dirent * configfs_get ( struct configfs_dirent * sd )
{
if ( sd ) {
WARN_ON ( ! atomic_read ( & sd - > s_count ) ) ;
atomic_inc ( & sd - > s_count ) ;
}
return sd ;
}
static inline void configfs_put ( struct configfs_dirent * sd )
{
WARN_ON ( ! atomic_read ( & sd - > s_count ) ) ;
if ( atomic_dec_and_test ( & sd - > s_count ) )
release_configfs_dirent ( sd ) ;
}