2019-05-20 20:08:01 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
2018-06-15 17:19:22 +03:00
/* AFS dynamic root handling
2018-04-06 16:17:25 +03:00
*
* Copyright ( C ) 2018 Red Hat , Inc . All Rights Reserved .
* Written by David Howells ( dhowells @ redhat . com )
*/
# include <linux/fs.h>
# include <linux/namei.h>
# include <linux/dns_resolver.h>
# include "internal.h"
afs: Build an abstraction around an "operation" concept
Turn the afs_operation struct into the main way that most fileserver
operations are managed. Various things are added to the struct, including
the following:
(1) All the parameters and results of the relevant operations are moved
into it, removing corresponding fields from the afs_call struct.
afs_call gets a pointer to the op.
(2) The target volume is made the main focus of the operation, rather than
the target vnode(s), and a bunch of op->vnode->volume are made
op->volume instead.
(3) Two vnode records are defined (op->file[]) for the vnode(s) involved
in most operations. The vnode record (struct afs_vnode_param)
contains:
- The vnode pointer.
- The fid of the vnode to be included in the parameters or that was
returned in the reply (eg. FS.MakeDir).
- The status and callback information that may be returned in the
reply about the vnode.
- Callback break and data version tracking for detecting
simultaneous third-parth changes.
(4) Pointers to dentries to be updated with new inodes.
(5) An operations table pointer. The table includes pointers to functions
for issuing AFS and YFS-variant RPCs, handling the success and abort
of an operation and handling post-I/O-lock local editing of a
directory.
To make this work, the following function restructuring is made:
(A) The rotation loop that issues calls to fileservers that can be found
in each function that wants to issue an RPC (such as afs_mkdir()) is
extracted out into common code, in a new file called fs_operation.c.
(B) The rotation loops, such as the one in afs_mkdir(), are replaced with
a much smaller piece of code that allocates an operation, sets the
parameters and then calls out to the common code to do the actual
work.
(C) The code for handling the success and failure of an operation are
moved into operation functions (as (5) above) and these are called
from the core code at appropriate times.
(D) The pseudo inode getting stuff used by the dynamic root code is moved
over into dynroot.c.
(E) struct afs_iget_data is absorbed into the operation struct and
afs_iget() expects to be given an op pointer and a vnode record.
(F) Point (E) doesn't work for the root dir of a volume, but we know the
FID in advance (it's always vnode 1, unique 1), so a separate inode
getter, afs_root_iget(), is provided to special-case that.
(G) The inode status init/update functions now also take an op and a vnode
record.
(H) The RPC marshalling functions now, for the most part, just take an
afs_operation struct as their only argument. All the data they need
is held there. The result delivery functions write their answers
there as well.
(I) The call is attached to the operation and then the operation core does
the waiting.
And then the new operation code is, for the moment, made to just initialise
the operation, get the appropriate vnode I/O locks and do the same rotation
loop as before.
This lays the foundation for the following changes in the future:
(*) Overhauling the rotation (again).
(*) Support for asynchronous I/O, where the fileserver rotation must be
done asynchronously also.
Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 22:51:51 +03:00
static atomic_t afs_autocell_ino ;
/*
* iget5 ( ) comparator for inode created by autocell operations
*
* These pseudo inodes don ' t match anything .
*/
static int afs_iget5_pseudo_test ( struct inode * inode , void * opaque )
{
return 0 ;
}
/*
* iget5 ( ) inode initialiser
*/
static int afs_iget5_pseudo_set ( struct inode * inode , void * opaque )
{
struct afs_super_info * as = AFS_FS_S ( inode - > i_sb ) ;
struct afs_vnode * vnode = AFS_FS_I ( inode ) ;
struct afs_fid * fid = opaque ;
vnode - > volume = as - > volume ;
vnode - > fid = * fid ;
inode - > i_ino = fid - > vnode ;
inode - > i_generation = fid - > unique ;
return 0 ;
}
/*
* Create an inode for a dynamic root directory or an autocell dynamic
* automount dir .
*/
struct inode * afs_iget_pseudo_dir ( struct super_block * sb , bool root )
{
struct afs_super_info * as = AFS_FS_S ( sb ) ;
struct afs_vnode * vnode ;
struct inode * inode ;
struct afs_fid fid = { } ;
_enter ( " " ) ;
if ( as - > volume )
fid . vid = as - > volume - > vid ;
if ( root ) {
fid . vnode = 1 ;
fid . unique = 1 ;
} else {
fid . vnode = atomic_inc_return ( & afs_autocell_ino ) ;
fid . unique = 0 ;
}
inode = iget5_locked ( sb , fid . vnode ,
afs_iget5_pseudo_test , afs_iget5_pseudo_set , & fid ) ;
if ( ! inode ) {
_leave ( " = -ENOMEM " ) ;
return ERR_PTR ( - ENOMEM ) ;
}
_debug ( " GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x } " ,
inode , inode - > i_ino , fid . vid , fid . vnode , fid . unique ) ;
vnode = AFS_FS_I ( inode ) ;
/* there shouldn't be an existing inode */
BUG_ON ( ! ( inode - > i_state & I_NEW ) ) ;
2022-06-10 01:04:01 +03:00
netfs_inode_init ( & vnode - > netfs , NULL ) ;
afs: Build an abstraction around an "operation" concept
Turn the afs_operation struct into the main way that most fileserver
operations are managed. Various things are added to the struct, including
the following:
(1) All the parameters and results of the relevant operations are moved
into it, removing corresponding fields from the afs_call struct.
afs_call gets a pointer to the op.
(2) The target volume is made the main focus of the operation, rather than
the target vnode(s), and a bunch of op->vnode->volume are made
op->volume instead.
(3) Two vnode records are defined (op->file[]) for the vnode(s) involved
in most operations. The vnode record (struct afs_vnode_param)
contains:
- The vnode pointer.
- The fid of the vnode to be included in the parameters or that was
returned in the reply (eg. FS.MakeDir).
- The status and callback information that may be returned in the
reply about the vnode.
- Callback break and data version tracking for detecting
simultaneous third-parth changes.
(4) Pointers to dentries to be updated with new inodes.
(5) An operations table pointer. The table includes pointers to functions
for issuing AFS and YFS-variant RPCs, handling the success and abort
of an operation and handling post-I/O-lock local editing of a
directory.
To make this work, the following function restructuring is made:
(A) The rotation loop that issues calls to fileservers that can be found
in each function that wants to issue an RPC (such as afs_mkdir()) is
extracted out into common code, in a new file called fs_operation.c.
(B) The rotation loops, such as the one in afs_mkdir(), are replaced with
a much smaller piece of code that allocates an operation, sets the
parameters and then calls out to the common code to do the actual
work.
(C) The code for handling the success and failure of an operation are
moved into operation functions (as (5) above) and these are called
from the core code at appropriate times.
(D) The pseudo inode getting stuff used by the dynamic root code is moved
over into dynroot.c.
(E) struct afs_iget_data is absorbed into the operation struct and
afs_iget() expects to be given an op pointer and a vnode record.
(F) Point (E) doesn't work for the root dir of a volume, but we know the
FID in advance (it's always vnode 1, unique 1), so a separate inode
getter, afs_root_iget(), is provided to special-case that.
(G) The inode status init/update functions now also take an op and a vnode
record.
(H) The RPC marshalling functions now, for the most part, just take an
afs_operation struct as their only argument. All the data they need
is held there. The result delivery functions write their answers
there as well.
(I) The call is attached to the operation and then the operation core does
the waiting.
And then the new operation code is, for the moment, made to just initialise
the operation, get the appropriate vnode I/O locks and do the same rotation
loop as before.
This lays the foundation for the following changes in the future:
(*) Overhauling the rotation (again).
(*) Support for asynchronous I/O, where the fileserver rotation must be
done asynchronously also.
Signed-off-by: David Howells <dhowells@redhat.com>
2020-04-10 22:51:51 +03:00
inode - > i_size = 0 ;
inode - > i_mode = S_IFDIR | S_IRUGO | S_IXUGO ;
if ( root ) {
inode - > i_op = & afs_dynroot_inode_operations ;
inode - > i_fop = & simple_dir_operations ;
} else {
inode - > i_op = & afs_autocell_inode_operations ;
}
set_nlink ( inode , 2 ) ;
inode - > i_uid = GLOBAL_ROOT_UID ;
inode - > i_gid = GLOBAL_ROOT_GID ;
inode - > i_ctime = inode - > i_atime = inode - > i_mtime = current_time ( inode ) ;
inode - > i_blocks = 0 ;
inode - > i_generation = 0 ;
set_bit ( AFS_VNODE_PSEUDODIR , & vnode - > flags ) ;
if ( ! root ) {
set_bit ( AFS_VNODE_MOUNTPOINT , & vnode - > flags ) ;
inode - > i_flags | = S_AUTOMOUNT ;
}
inode - > i_flags | = S_NOATIME ;
unlock_new_inode ( inode ) ;
_leave ( " = %p " , inode ) ;
return inode ;
}
2018-04-06 16:17:25 +03:00
/*
* Probe to see if a cell may exist . This prevents positive dentries from
* being created unnecessarily .
*/
static int afs_probe_cell_name ( struct dentry * dentry )
{
struct afs_cell * cell ;
2019-06-26 23:02:33 +03:00
struct afs_net * net = afs_d2net ( dentry ) ;
2018-04-06 16:17:25 +03:00
const char * name = dentry - > d_name . name ;
size_t len = dentry - > d_name . len ;
int ret ;
/* Names prefixed with a dot are R/W mounts. */
if ( name [ 0 ] = = ' . ' ) {
if ( len = = 1 )
return - EINVAL ;
name + + ;
len - - ;
}
2020-10-13 22:51:59 +03:00
cell = afs_find_cell ( net , name , len , afs_cell_trace_use_probe ) ;
2018-04-06 16:17:25 +03:00
if ( ! IS_ERR ( cell ) ) {
2020-10-13 22:51:59 +03:00
afs_unuse_cell ( net , cell , afs_cell_trace_unuse_probe ) ;
2018-04-06 16:17:25 +03:00
return 0 ;
}
2019-06-26 23:02:33 +03:00
ret = dns_query ( net - > net , " afsdb " , name , len , " srv=1 " ,
NULL , NULL , false ) ;
2018-04-06 16:17:25 +03:00
if ( ret = = - ENODATA )
ret = - EDESTADDRREQ ;
return ret ;
}
/*
* Try to auto mount the mountpoint with pseudo directory , if the autocell
* operation is setted .
*/
struct inode * afs_try_auto_mntpt ( struct dentry * dentry , struct inode * dir )
{
struct afs_vnode * vnode = AFS_FS_I ( dir ) ;
struct inode * inode ;
int ret = - ENOENT ;
2018-10-20 02:57:57 +03:00
_enter ( " %p{%pd}, {%llx:%llu} " ,
2018-04-06 16:17:25 +03:00
dentry , dentry , vnode - > fid . vid , vnode - > fid . vnode ) ;
if ( ! test_bit ( AFS_VNODE_AUTOCELL , & vnode - > flags ) )
goto out ;
ret = afs_probe_cell_name ( dentry ) ;
if ( ret < 0 )
goto out ;
inode = afs_iget_pseudo_dir ( dir - > i_sb , false ) ;
if ( IS_ERR ( inode ) ) {
ret = PTR_ERR ( inode ) ;
goto out ;
}
_leave ( " = %p " , inode ) ;
return inode ;
out :
_leave ( " = %d " , ret ) ;
2018-06-24 17:45:44 +03:00
return ret = = - ENOENT ? NULL : ERR_PTR ( ret ) ;
2018-04-06 16:17:25 +03:00
}
/*
* Look up @ cell in a dynroot directory . This is a substitution for the
* local cell name for the net namespace .
*/
static struct dentry * afs_lookup_atcell ( struct dentry * dentry )
{
struct afs_cell * cell ;
struct afs_net * net = afs_d2net ( dentry ) ;
struct dentry * ret ;
char * name ;
int len ;
if ( ! net - > ws_cell )
return ERR_PTR ( - ENOENT ) ;
ret = ERR_PTR ( - ENOMEM ) ;
name = kmalloc ( AFS_MAXCELLNAME + 1 , GFP_KERNEL ) ;
if ( ! name )
goto out_p ;
afs: Fix rapid cell addition/removal by not using RCU on cells tree
There are a number of problems that are being seen by the rapidly mounting
and unmounting an afs dynamic root with an explicit cell and volume
specified (which should probably be rejected, but that's a separate issue):
What the tests are doing is to look up/create a cell record for the name
given and then tear it down again without actually using it to try to talk
to a server. This is repeated endlessly, very fast, and the new cell
collides with the old one if it's not quick enough to reuse it.
It appears (as suggested by Hillf Danton) that the search through the RB
tree under a read_seqbegin_or_lock() under RCU conditions isn't safe and
that it's not blocking the write_seqlock(), despite taking two passes at
it. He suggested that the code should take a ref on the cell it's
attempting to look at - but this shouldn't be necessary until we've
compared the cell names. It's possible that I'm missing a barrier
somewhere.
However, using an RCU search for this is overkill, really - we only need to
access the cell name in a few places, and they're places where we're may
end up sleeping anyway.
Fix this by switching to an R/W semaphore instead.
Additionally, draw the down_read() call inside the function (renamed to
afs_find_cell()) since all the callers were taking the RCU read lock (or
should've been[*]).
[*] afs_probe_cell_name() should have been, but that doesn't appear to be
involved in the bug reports.
The symptoms of this look like:
general protection fault, probably for non-canonical address 0xf27d208691691fdb: 0000 [#1] PREEMPT SMP KASAN
KASAN: maybe wild-memory-access in range [0x93e924348b48fed8-0x93e924348b48fedf]
...
RIP: 0010:strncasecmp lib/string.c:52 [inline]
RIP: 0010:strncasecmp+0x5f/0x240 lib/string.c:43
afs_lookup_cell_rcu+0x313/0x720 fs/afs/cell.c:88
afs_lookup_cell+0x2ee/0x1440 fs/afs/cell.c:249
afs_parse_source fs/afs/super.c:290 [inline]
...
Fixes: 989782dcdc91 ("afs: Overhaul cell database management")
Reported-by: syzbot+459a5dce0b4cb70fd076@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Hillf Danton <hdanton@sina.com>
cc: syzkaller-bugs@googlegroups.com
2020-10-09 16:11:58 +03:00
down_read ( & net - > cells_lock ) ;
cell = net - > ws_cell ;
if ( cell ) {
len = cell - > name_len ;
memcpy ( name , cell - > name , len + 1 ) ;
}
up_read ( & net - > cells_lock ) ;
2018-04-06 16:17:25 +03:00
ret = ERR_PTR ( - ENOENT ) ;
if ( ! cell )
goto out_n ;
ret = lookup_one_len ( name , dentry - > d_parent , len ) ;
/* We don't want to d_add() the @cell dentry here as we don't want to
* the cached dentry to hide changes to the local cell name .
*/
out_n :
kfree ( name ) ;
out_p :
return ret ;
}
/*
* Look up an entry in a dynroot directory .
*/
static struct dentry * afs_dynroot_lookup ( struct inode * dir , struct dentry * dentry ,
unsigned int flags )
{
_enter ( " %pd " , dentry ) ;
ASSERTCMP ( d_inode ( dentry ) , = = , NULL ) ;
2019-12-11 11:56:04 +03:00
if ( flags & LOOKUP_CREATE )
return ERR_PTR ( - EOPNOTSUPP ) ;
2018-04-06 16:17:25 +03:00
if ( dentry - > d_name . len > = AFSNAMEMAX ) {
_leave ( " = -ENAMETOOLONG " ) ;
return ERR_PTR ( - ENAMETOOLONG ) ;
}
if ( dentry - > d_name . len = = 5 & &
memcmp ( dentry - > d_name . name , " @cell " , 5 ) = = 0 )
return afs_lookup_atcell ( dentry ) ;
2018-06-24 17:45:44 +03:00
return d_splice_alias ( afs_try_auto_mntpt ( dentry , dir ) , dentry ) ;
2018-04-06 16:17:25 +03:00
}
const struct inode_operations afs_dynroot_inode_operations = {
. lookup = afs_dynroot_lookup ,
} ;
/*
* Dirs in the dynamic root don ' t need revalidation .
*/
static int afs_dynroot_d_revalidate ( struct dentry * dentry , unsigned int flags )
{
return 1 ;
}
/*
* Allow the VFS to enquire as to whether a dentry should be unhashed ( mustn ' t
* sleep )
* - called from dput ( ) when d_count is going to 0.
* - return 1 to request dentry be unhashed , 0 otherwise
*/
static int afs_dynroot_d_delete ( const struct dentry * dentry )
{
return d_really_is_positive ( dentry ) ;
}
const struct dentry_operations afs_dynroot_dentry_operations = {
. d_revalidate = afs_dynroot_d_revalidate ,
. d_delete = afs_dynroot_d_delete ,
. d_release = afs_d_release ,
. d_automount = afs_d_automount ,
} ;
2018-06-15 17:19:22 +03:00
/*
* Create a manually added cell mount directory .
* - The caller must hold net - > proc_cells_lock
*/
int afs_dynroot_mkdir ( struct afs_net * net , struct afs_cell * cell )
{
struct super_block * sb = net - > dynroot_sb ;
struct dentry * root , * subdir ;
int ret ;
if ( ! sb | | atomic_read ( & sb - > s_active ) = = 0 )
return 0 ;
/* Let the ->lookup op do the creation */
root = sb - > s_root ;
inode_lock ( root - > d_inode ) ;
subdir = lookup_one_len ( cell - > name , root , cell - > name_len ) ;
if ( IS_ERR ( subdir ) ) {
ret = PTR_ERR ( subdir ) ;
goto unlock ;
}
/* Note that we're retaining an extra ref on the dentry */
subdir - > d_fsdata = ( void * ) 1UL ;
ret = 0 ;
unlock :
inode_unlock ( root - > d_inode ) ;
return ret ;
}
/*
* Remove a manually added cell mount directory .
* - The caller must hold net - > proc_cells_lock
*/
void afs_dynroot_rmdir ( struct afs_net * net , struct afs_cell * cell )
{
struct super_block * sb = net - > dynroot_sb ;
struct dentry * root , * subdir ;
if ( ! sb | | atomic_read ( & sb - > s_active ) = = 0 )
return ;
root = sb - > s_root ;
inode_lock ( root - > d_inode ) ;
/* Don't want to trigger a lookup call, which will re-add the cell */
subdir = try_lookup_one_len ( cell - > name , root , cell - > name_len ) ;
if ( IS_ERR_OR_NULL ( subdir ) ) {
_debug ( " lookup %ld " , PTR_ERR ( subdir ) ) ;
goto no_dentry ;
}
_debug ( " rmdir %pd %u " , subdir , d_count ( subdir ) ) ;
if ( subdir - > d_fsdata ) {
_debug ( " unpin %u " , d_count ( subdir ) ) ;
subdir - > d_fsdata = NULL ;
dput ( subdir ) ;
}
dput ( subdir ) ;
no_dentry :
inode_unlock ( root - > d_inode ) ;
_leave ( " " ) ;
}
/*
* Populate a newly created dynamic root with cell names .
*/
int afs_dynroot_populate ( struct super_block * sb )
{
struct afs_cell * cell ;
struct afs_net * net = afs_sb2net ( sb ) ;
int ret ;
2019-05-09 11:17:08 +03:00
mutex_lock ( & net - > proc_cells_lock ) ;
2018-06-15 17:19:22 +03:00
net - > dynroot_sb = sb ;
2018-10-12 00:45:49 +03:00
hlist_for_each_entry ( cell , & net - > proc_cells , proc_link ) {
2018-06-15 17:19:22 +03:00
ret = afs_dynroot_mkdir ( net , cell ) ;
if ( ret < 0 )
goto error ;
}
ret = 0 ;
out :
mutex_unlock ( & net - > proc_cells_lock ) ;
return ret ;
error :
net - > dynroot_sb = NULL ;
goto out ;
}
/*
* When a dynamic root that ' s in the process of being destroyed , depopulate it
* of pinned directories .
*/
void afs_dynroot_depopulate ( struct super_block * sb )
{
struct afs_net * net = afs_sb2net ( sb ) ;
struct dentry * root = sb - > s_root , * subdir , * tmp ;
/* Prevent more subdirs from being created */
mutex_lock ( & net - > proc_cells_lock ) ;
if ( net - > dynroot_sb = = sb )
net - > dynroot_sb = NULL ;
mutex_unlock ( & net - > proc_cells_lock ) ;
2020-08-21 12:15:12 +03:00
if ( root ) {
inode_lock ( root - > d_inode ) ;
/* Remove all the pins for dirs created for manually added cells */
list_for_each_entry_safe ( subdir , tmp , & root - > d_subdirs , d_child ) {
if ( subdir - > d_fsdata ) {
subdir - > d_fsdata = NULL ;
dput ( subdir ) ;
}
2018-06-15 17:19:22 +03:00
}
2020-08-21 12:15:12 +03:00
inode_unlock ( root - > d_inode ) ;
}
2018-06-15 17:19:22 +03:00
}