2010-10-20 08:18:04 +04:00
/*
* Device operations for the pnfs nfs4 file layout driver .
*
* Copyright ( c ) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand < dhildebz @ umich . edu >
* Garth Goodson < Garth . Goodson @ netapp . com >
*
* Permission is granted to use , copy , create derivative works , and
* redistribute this software and such derivative works for any purpose ,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific , written prior authorization . If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software , then the disclaimer below must also be included .
*
* This software is provided as is , without representation or warranty
* of any kind either express or implied , including without limitation
* the implied warranties of merchantability , fitness for a particular
* purpose , or noninfringement . The Regents of the University of
* Michigan shall not be liable for any damages , including special ,
* indirect , incidental , or consequential damages , with respect to any
* claim arising out of or in connection with the use of the software ,
* even if it has been or is hereafter advised of the possibility of
* such damages .
*/
# include <linux/nfs_fs.h>
# include <linux/vmalloc.h>
# include "internal.h"
# include "nfs4filelayout.h"
# define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* Data server cache
*
* Data servers can be mapped to different device ids .
* nfs4_pnfs_ds reference counting
* - set to 1 on allocation
* - incremented when a device id maps a data server already in the cache .
* - decremented when deviceid is removed from the cache .
*/
DEFINE_SPINLOCK ( nfs4_ds_cache_lock ) ;
static LIST_HEAD ( nfs4_data_server_cache ) ;
/* Debug routines */
void
print_ds ( struct nfs4_pnfs_ds * ds )
{
if ( ds = = NULL ) {
printk ( " %s NULL device \n " , __func__ ) ;
return ;
}
2011-06-01 02:48:56 +04:00
printk ( " ds %s \n "
2010-10-20 08:18:04 +04:00
" ref count %d \n "
" client %p \n "
" cl_exchange_flags %x \n " ,
2011-06-01 02:48:56 +04:00
ds - > ds_remotestr ,
2010-10-20 08:18:04 +04:00
atomic_read ( & ds - > ds_count ) , ds - > ds_clp ,
ds - > ds_clp ? ds - > ds_clp - > cl_exchange_flags : 0 ) ;
}
2011-06-01 02:48:57 +04:00
static bool
same_sockaddr ( struct sockaddr * addr1 , struct sockaddr * addr2 )
2010-10-20 08:18:04 +04:00
{
2011-06-01 02:48:56 +04:00
struct sockaddr_in * a , * b ;
struct sockaddr_in6 * a6 , * b6 ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:57 +04:00
if ( addr1 - > sa_family ! = addr2 - > sa_family )
return false ;
switch ( addr1 - > sa_family ) {
case AF_INET :
a = ( struct sockaddr_in * ) addr1 ;
b = ( struct sockaddr_in * ) addr2 ;
if ( a - > sin_addr . s_addr = = b - > sin_addr . s_addr & &
a - > sin_port = = b - > sin_port )
return true ;
break ;
case AF_INET6 :
a6 = ( struct sockaddr_in6 * ) addr1 ;
b6 = ( struct sockaddr_in6 * ) addr2 ;
/* LINKLOCAL addresses must have matching scope_id */
if ( ipv6_addr_scope ( & a6 - > sin6_addr ) = =
IPV6_ADDR_SCOPE_LINKLOCAL & &
a6 - > sin6_scope_id ! = b6 - > sin6_scope_id )
return false ;
if ( ipv6_addr_equal ( & a6 - > sin6_addr , & b6 - > sin6_addr ) & &
a6 - > sin6_port = = b6 - > sin6_port )
return true ;
break ;
default :
dprintk ( " %s: unhandled address family: %u \n " ,
__func__ , addr1 - > sa_family ) ;
return false ;
}
return false ;
}
/*
* Lookup DS by addresses . The first matching address returns true .
* nfs4_ds_cache_lock is held
*/
static struct nfs4_pnfs_ds *
_data_server_lookup_locked ( struct list_head * dsaddrs )
{
struct nfs4_pnfs_ds * ds ;
struct nfs4_pnfs_ds_addr * da1 , * da2 ;
list_for_each_entry ( da1 , dsaddrs , da_node ) {
list_for_each_entry ( ds , & nfs4_data_server_cache , ds_node ) {
list_for_each_entry ( da2 , & ds - > ds_addrs , da_node ) {
if ( same_sockaddr (
( struct sockaddr * ) & da1 - > da_addr ,
( struct sockaddr * ) & da2 - > da_addr ) )
return ds ;
}
2010-10-20 08:18:04 +04:00
}
}
return NULL ;
}
2011-06-01 02:48:57 +04:00
/*
* Compare two lists of addresses .
*/
static bool
_data_server_match_all_addrs_locked ( struct list_head * dsaddrs1 ,
struct list_head * dsaddrs2 )
{
struct nfs4_pnfs_ds_addr * da1 , * da2 ;
size_t count1 = 0 ,
count2 = 0 ;
list_for_each_entry ( da1 , dsaddrs1 , da_node )
count1 + + ;
list_for_each_entry ( da2 , dsaddrs2 , da_node ) {
bool found = false ;
count2 + + ;
list_for_each_entry ( da1 , dsaddrs1 , da_node ) {
if ( same_sockaddr ( ( struct sockaddr * ) & da1 - > da_addr ,
( struct sockaddr * ) & da2 - > da_addr ) ) {
found = true ;
break ;
}
}
if ( ! found )
return false ;
}
return ( count1 = = count2 ) ;
}
2011-03-01 04:34:17 +03:00
/*
* Create an rpc connection to the nfs4_pnfs_ds data server
2011-06-02 00:32:21 +04:00
* Currently only supports IPv4 and IPv6 addresses
2011-03-01 04:34:17 +03:00
*/
static int
nfs4_ds_connect ( struct nfs_server * mds_srv , struct nfs4_pnfs_ds * ds )
{
2011-06-01 02:48:58 +04:00
struct nfs_client * clp = ERR_PTR ( - EIO ) ;
2011-06-01 02:48:57 +04:00
struct nfs4_pnfs_ds_addr * da ;
2011-03-01 04:34:17 +03:00
int status = 0 ;
2011-06-01 02:48:57 +04:00
dprintk ( " --> %s DS %s au_flavor %d \n " , __func__ , ds - > ds_remotestr ,
2011-03-01 04:34:17 +03:00
mds_srv - > nfs_client - > cl_rpcclient - > cl_auth - > au_flavor ) ;
2011-06-01 02:48:57 +04:00
BUG_ON ( list_empty ( & ds - > ds_addrs ) ) ;
2011-06-01 02:48:58 +04:00
list_for_each_entry ( da , & ds - > ds_addrs , da_node ) {
dprintk ( " %s: DS %s: trying address %s \n " ,
__func__ , ds - > ds_remotestr , da - > da_remotestr ) ;
2011-06-01 02:48:57 +04:00
2011-06-01 02:48:58 +04:00
clp = nfs4_set_ds_client ( mds_srv - > nfs_client ,
2011-06-01 02:48:57 +04:00
( struct sockaddr * ) & da - > da_addr ,
da - > da_addrlen , IPPROTO_TCP ) ;
2011-06-01 02:48:58 +04:00
if ( ! IS_ERR ( clp ) )
break ;
}
2011-03-01 04:34:17 +03:00
if ( IS_ERR ( clp ) ) {
status = PTR_ERR ( clp ) ;
goto out ;
}
if ( ( clp - > cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS ) ! = 0 ) {
if ( ! is_ds_client ( clp ) ) {
status = - ENODEV ;
goto out_put ;
}
ds - > ds_clp = clp ;
2011-06-01 02:48:56 +04:00
dprintk ( " %s [existing] server=%s \n " , __func__ ,
ds - > ds_remotestr ) ;
2011-03-01 04:34:17 +03:00
goto out ;
}
/*
* Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
* be equal to the MDS lease . Renewal is scheduled in create_session .
*/
spin_lock ( & mds_srv - > nfs_client - > cl_lock ) ;
clp - > cl_lease_time = mds_srv - > nfs_client - > cl_lease_time ;
spin_unlock ( & mds_srv - > nfs_client - > cl_lock ) ;
clp - > cl_last_renewal = jiffies ;
/* New nfs_client */
status = nfs4_init_ds_session ( clp ) ;
if ( status )
goto out_put ;
ds - > ds_clp = clp ;
2011-06-01 02:48:56 +04:00
dprintk ( " %s [new] addr: %s \n " , __func__ , ds - > ds_remotestr ) ;
2011-03-01 04:34:17 +03:00
out :
return status ;
out_put :
nfs_put_client ( clp ) ;
goto out ;
}
2010-10-20 08:18:04 +04:00
static void
destroy_ds ( struct nfs4_pnfs_ds * ds )
{
2011-06-01 02:48:57 +04:00
struct nfs4_pnfs_ds_addr * da ;
2010-10-20 08:18:04 +04:00
dprintk ( " --> %s \n " , __func__ ) ;
ifdebug ( FACILITY )
print_ds ( ds ) ;
if ( ds - > ds_clp )
nfs_put_client ( ds - > ds_clp ) ;
2011-06-01 02:48:57 +04:00
while ( ! list_empty ( & ds - > ds_addrs ) ) {
da = list_first_entry ( & ds - > ds_addrs ,
struct nfs4_pnfs_ds_addr ,
da_node ) ;
list_del_init ( & da - > da_node ) ;
kfree ( da - > da_remotestr ) ;
kfree ( da ) ;
}
2011-06-01 02:48:56 +04:00
kfree ( ds - > ds_remotestr ) ;
2010-10-20 08:18:04 +04:00
kfree ( ds ) ;
}
2011-05-20 15:47:33 +04:00
void
2010-10-20 08:18:04 +04:00
nfs4_fl_free_deviceid ( struct nfs4_file_layout_dsaddr * dsaddr )
{
struct nfs4_pnfs_ds * ds ;
int i ;
2011-05-20 06:14:47 +04:00
nfs4_print_deviceid ( & dsaddr - > id_node . deviceid ) ;
2010-10-20 08:18:04 +04:00
for ( i = 0 ; i < dsaddr - > ds_num ; i + + ) {
ds = dsaddr - > ds_list [ i ] ;
if ( ds ! = NULL ) {
if ( atomic_dec_and_lock ( & ds - > ds_count ,
& nfs4_ds_cache_lock ) ) {
list_del_init ( & ds - > ds_node ) ;
spin_unlock ( & nfs4_ds_cache_lock ) ;
destroy_ds ( ds ) ;
}
}
}
kfree ( dsaddr - > stripe_indices ) ;
kfree ( dsaddr ) ;
}
2011-06-01 02:48:56 +04:00
/*
* Create a string with a human readable address and port to avoid
* complicated setup around many dprinks .
*/
static char *
2011-06-01 02:48:57 +04:00
nfs4_pnfs_remotestr ( struct list_head * dsaddrs , gfp_t gfp_flags )
2011-06-01 02:48:56 +04:00
{
2011-06-01 02:48:57 +04:00
struct nfs4_pnfs_ds_addr * da ;
2011-06-01 02:48:56 +04:00
char * remotestr ;
size_t len ;
2011-06-01 02:48:57 +04:00
char * p ;
2011-06-01 02:48:56 +04:00
2011-06-01 02:48:57 +04:00
len = 3 ; /* '{', '}' and eol */
list_for_each_entry ( da , dsaddrs , da_node ) {
len + = strlen ( da - > da_remotestr ) + 1 ; /* string plus comma */
2011-06-01 02:48:56 +04:00
}
2011-06-01 02:48:57 +04:00
remotestr = kzalloc ( len , gfp_flags ) ;
if ( ! remotestr )
2011-06-01 02:48:56 +04:00
return NULL ;
2011-06-01 02:48:57 +04:00
p = remotestr ;
* ( p + + ) = ' { ' ;
len - - ;
list_for_each_entry ( da , dsaddrs , da_node ) {
size_t ll = strlen ( da - > da_remotestr ) ;
2011-06-01 02:48:56 +04:00
2011-06-01 02:48:57 +04:00
if ( ll > len )
goto out_err ;
2011-06-01 02:48:56 +04:00
2011-06-01 02:48:57 +04:00
memcpy ( p , da - > da_remotestr , ll ) ;
p + = ll ;
len - = ll ;
2011-06-01 02:48:56 +04:00
2011-06-01 02:48:57 +04:00
if ( len < 1 )
goto out_err ;
( * p + + ) = ' , ' ;
len - - ;
}
if ( len < 2 )
goto out_err ;
* ( p + + ) = ' } ' ;
* p = ' \0 ' ;
2011-06-01 02:48:56 +04:00
return remotestr ;
2011-06-01 02:48:57 +04:00
out_err :
kfree ( remotestr ) ;
return NULL ;
2011-06-01 02:48:56 +04:00
}
2010-10-20 08:18:04 +04:00
static struct nfs4_pnfs_ds *
2011-06-01 02:48:57 +04:00
nfs4_pnfs_ds_add ( struct list_head * dsaddrs , gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-06-01 02:48:56 +04:00
struct nfs4_pnfs_ds * tmp_ds , * ds = NULL ;
char * remotestr ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:57 +04:00
if ( list_empty ( dsaddrs ) ) {
dprintk ( " %s: no addresses defined \n " , __func__ ) ;
goto out ;
}
ds = kzalloc ( sizeof ( * ds ) , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( ! ds )
goto out ;
2011-06-01 02:48:56 +04:00
/* this is only used for debugging, so it's ok if its NULL */
2011-06-01 02:48:57 +04:00
remotestr = nfs4_pnfs_remotestr ( dsaddrs , gfp_flags ) ;
2011-06-01 02:48:56 +04:00
2010-10-20 08:18:04 +04:00
spin_lock ( & nfs4_ds_cache_lock ) ;
2011-06-01 02:48:57 +04:00
tmp_ds = _data_server_lookup_locked ( dsaddrs ) ;
2010-10-20 08:18:04 +04:00
if ( tmp_ds = = NULL ) {
2011-06-01 02:48:57 +04:00
INIT_LIST_HEAD ( & ds - > ds_addrs ) ;
list_splice_init ( dsaddrs , & ds - > ds_addrs ) ;
2011-06-01 02:48:56 +04:00
ds - > ds_remotestr = remotestr ;
2010-10-20 08:18:04 +04:00
atomic_set ( & ds - > ds_count , 1 ) ;
INIT_LIST_HEAD ( & ds - > ds_node ) ;
ds - > ds_clp = NULL ;
list_add ( & ds - > ds_node , & nfs4_data_server_cache ) ;
2011-06-01 02:48:56 +04:00
dprintk ( " %s add new data server %s \n " , __func__ ,
ds - > ds_remotestr ) ;
2010-10-20 08:18:04 +04:00
} else {
2011-06-01 02:48:57 +04:00
if ( ! _data_server_match_all_addrs_locked ( & tmp_ds - > ds_addrs ,
dsaddrs ) ) {
dprintk ( " %s: multipath address mismatch: %s != %s " ,
__func__ , tmp_ds - > ds_remotestr , remotestr ) ;
}
2011-06-01 02:48:56 +04:00
kfree ( remotestr ) ;
2010-10-20 08:18:04 +04:00
kfree ( ds ) ;
atomic_inc ( & tmp_ds - > ds_count ) ;
2011-06-01 02:48:56 +04:00
dprintk ( " %s data server %s found, inc'ed ds_count to %d \n " ,
__func__ , tmp_ds - > ds_remotestr ,
2010-10-20 08:18:04 +04:00
atomic_read ( & tmp_ds - > ds_count ) ) ;
ds = tmp_ds ;
}
spin_unlock ( & nfs4_ds_cache_lock ) ;
out :
return ds ;
}
/*
2011-06-01 02:48:56 +04:00
* Currently only supports ipv4 , ipv6 and one multi - path address .
2010-10-20 08:18:04 +04:00
*/
2011-06-01 02:48:57 +04:00
static struct nfs4_pnfs_ds_addr *
decode_ds_addr ( struct xdr_stream * streamp , gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-06-01 02:48:57 +04:00
struct nfs4_pnfs_ds_addr * da = NULL ;
2011-06-01 02:48:56 +04:00
char * buf , * portstr ;
u32 port ;
int nlen , rlen ;
2010-10-20 08:18:04 +04:00
int tmp [ 2 ] ;
2011-03-24 23:48:21 +03:00
__be32 * p ;
2011-06-01 02:48:56 +04:00
char * netid , * match_netid ;
2011-06-01 02:48:57 +04:00
size_t len , match_netid_len ;
char * startsep = " " ;
char * endsep = " " ;
2010-10-20 08:18:04 +04:00
/* r_netid */
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( streamp , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err ;
2010-10-20 08:18:04 +04:00
nlen = be32_to_cpup ( p + + ) ;
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( streamp , nlen ) ;
if ( unlikely ( ! p ) )
goto out_err ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:56 +04:00
netid = kmalloc ( nlen + 1 , gfp_flags ) ;
if ( unlikely ( ! netid ) )
2010-10-20 08:18:04 +04:00
goto out_err ;
2011-06-01 02:48:56 +04:00
netid [ nlen ] = ' \0 ' ;
memcpy ( netid , p , nlen ) ;
/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( streamp , 4 ) ;
if ( unlikely ( ! p ) )
2011-06-01 02:48:56 +04:00
goto out_free_netid ;
2011-03-24 23:48:21 +03:00
rlen = be32_to_cpup ( p ) ;
p = xdr_inline_decode ( streamp , rlen ) ;
if ( unlikely ( ! p ) )
2011-06-01 02:48:56 +04:00
goto out_free_netid ;
2011-03-24 23:48:21 +03:00
2011-06-01 02:48:56 +04:00
/* port is ".ABC.DEF", 8 chars max */
if ( rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8 ) {
2011-01-17 21:41:50 +03:00
dprintk ( " %s: Invalid address, length %d \n " , __func__ ,
2010-10-20 08:18:04 +04:00
rlen ) ;
2011-06-01 02:48:56 +04:00
goto out_free_netid ;
2010-10-20 08:18:04 +04:00
}
2011-05-12 02:00:51 +04:00
buf = kmalloc ( rlen + 1 , gfp_flags ) ;
2011-02-06 02:13:01 +03:00
if ( ! buf ) {
dprintk ( " %s: Not enough memory \n " , __func__ ) ;
2011-06-01 02:48:56 +04:00
goto out_free_netid ;
2011-02-06 02:13:01 +03:00
}
2010-10-20 08:18:04 +04:00
buf [ rlen ] = ' \0 ' ;
2011-03-24 23:48:21 +03:00
memcpy ( buf , p , rlen ) ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:56 +04:00
/* replace port '.' with '-' */
portstr = strrchr ( buf , ' . ' ) ;
if ( ! portstr ) {
dprintk ( " %s: Failed finding expected dot in port \n " ,
__func__ ) ;
goto out_free_buf ;
}
* portstr = ' - ' ;
/* find '.' between address and port */
portstr = strrchr ( buf , ' . ' ) ;
if ( ! portstr ) {
dprintk ( " %s: Failed finding expected dot between address and "
" port \n " , __func__ ) ;
goto out_free_buf ;
2010-10-20 08:18:04 +04:00
}
2011-06-01 02:48:56 +04:00
* portstr = ' \0 ' ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:57 +04:00
da = kzalloc ( sizeof ( * da ) , gfp_flags ) ;
if ( unlikely ( ! da ) )
2011-06-01 02:48:56 +04:00
goto out_free_buf ;
2011-06-01 02:48:57 +04:00
INIT_LIST_HEAD ( & da - > da_node ) ;
if ( ! rpc_pton ( buf , portstr - buf , ( struct sockaddr * ) & da - > da_addr ,
sizeof ( da - > da_addr ) ) ) {
dprintk ( " %s: error parsing address %s \n " , __func__ , buf ) ;
goto out_free_da ;
2010-10-20 08:18:04 +04:00
}
2011-06-01 02:48:56 +04:00
portstr + + ;
sscanf ( portstr , " %d-%d " , & tmp [ 0 ] , & tmp [ 1 ] ) ;
2010-10-20 08:18:04 +04:00
port = htons ( ( tmp [ 0 ] < < 8 ) | ( tmp [ 1 ] ) ) ;
2011-06-01 02:48:57 +04:00
switch ( da - > da_addr . ss_family ) {
2011-06-01 02:48:56 +04:00
case AF_INET :
2011-06-01 02:48:57 +04:00
( ( struct sockaddr_in * ) & da - > da_addr ) - > sin_port = port ;
da - > da_addrlen = sizeof ( struct sockaddr_in ) ;
2011-06-01 02:48:56 +04:00
match_netid = " tcp " ;
match_netid_len = 3 ;
break ;
case AF_INET6 :
2011-06-01 02:48:57 +04:00
( ( struct sockaddr_in6 * ) & da - > da_addr ) - > sin6_port = port ;
da - > da_addrlen = sizeof ( struct sockaddr_in6 ) ;
2011-06-01 02:48:56 +04:00
match_netid = " tcp6 " ;
match_netid_len = 4 ;
2011-06-01 02:48:57 +04:00
startsep = " [ " ;
endsep = " ] " ;
2011-06-01 02:48:56 +04:00
break ;
default :
dprintk ( " %s: unsupported address family: %u \n " ,
2011-06-01 02:48:57 +04:00
__func__ , da - > da_addr . ss_family ) ;
goto out_free_da ;
2011-06-01 02:48:56 +04:00
}
if ( nlen ! = match_netid_len | | strncmp ( netid , match_netid , nlen ) ) {
dprintk ( " %s: ERROR: r_netid \" %s \" != \" %s \" \n " ,
__func__ , netid , match_netid ) ;
2011-06-01 02:48:57 +04:00
goto out_free_da ;
2011-06-01 02:48:56 +04:00
}
2011-06-01 02:48:57 +04:00
/* save human readable address */
len = strlen ( startsep ) + strlen ( buf ) + strlen ( endsep ) + 7 ;
da - > da_remotestr = kzalloc ( len , gfp_flags ) ;
/* NULL is ok, only used for dprintk */
if ( da - > da_remotestr )
snprintf ( da - > da_remotestr , len , " %s%s%s:%u " , startsep ,
buf , endsep , ntohs ( port ) ) ;
dprintk ( " %s: Parsed DS addr %s \n " , __func__ , da - > da_remotestr ) ;
kfree ( buf ) ;
kfree ( netid ) ;
return da ;
out_free_da :
kfree ( da ) ;
2011-06-01 02:48:56 +04:00
out_free_buf :
2011-06-01 02:48:57 +04:00
dprintk ( " %s: Error parsing DS addr: %s \n " , __func__ , buf ) ;
2010-10-20 08:18:04 +04:00
kfree ( buf ) ;
2011-06-01 02:48:56 +04:00
out_free_netid :
kfree ( netid ) ;
2010-10-20 08:18:04 +04:00
out_err :
2011-06-01 02:48:57 +04:00
return NULL ;
2010-10-20 08:18:04 +04:00
}
/* Decode opaque device data and return the result */
static struct nfs4_file_layout_dsaddr *
2011-05-12 02:00:51 +04:00
decode_device ( struct inode * ino , struct pnfs_device * pdev , gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-03-24 23:48:21 +03:00
int i ;
2010-10-20 08:18:04 +04:00
u32 cnt , num ;
u8 * indexp ;
2011-03-24 23:48:21 +03:00
__be32 * p ;
u8 * stripe_indices ;
u8 max_stripe_index ;
struct nfs4_file_layout_dsaddr * dsaddr = NULL ;
struct xdr_stream stream ;
2011-05-19 22:16:47 +04:00
struct xdr_buf buf ;
2011-03-24 23:48:21 +03:00
struct page * scratch ;
2011-06-01 02:48:57 +04:00
struct list_head dsaddrs ;
struct nfs4_pnfs_ds_addr * da ;
2011-03-24 23:48:21 +03:00
/* set up xdr stream */
2011-05-12 02:00:51 +04:00
scratch = alloc_page ( gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! scratch )
goto out_err ;
2011-05-19 22:16:47 +04:00
xdr_init_decode_pages ( & stream , & buf , pdev - > pages , pdev - > pglen ) ;
2011-03-24 23:48:21 +03:00
xdr_set_scratch_buffer ( & stream , page_address ( scratch ) , PAGE_SIZE ) ;
2010-10-20 08:18:04 +04:00
/* Get the stripe count (number of stripe index) */
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( & stream , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err_free_scratch ;
cnt = be32_to_cpup ( p ) ;
2010-10-20 08:18:04 +04:00
dprintk ( " %s stripe count %d \n " , __func__ , cnt ) ;
if ( cnt > NFS4_PNFS_MAX_STRIPE_CNT ) {
printk ( KERN_WARNING " %s: stripe count %d greater than "
" supported maximum %d \n " , __func__ ,
cnt , NFS4_PNFS_MAX_STRIPE_CNT ) ;
2011-03-24 23:48:21 +03:00
goto out_err_free_scratch ;
}
/* read stripe indices */
2011-05-12 02:00:51 +04:00
stripe_indices = kcalloc ( cnt , sizeof ( u8 ) , gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! stripe_indices )
goto out_err_free_scratch ;
p = xdr_inline_decode ( & stream , cnt < < 2 ) ;
if ( unlikely ( ! p ) )
goto out_err_free_stripe_indices ;
indexp = & stripe_indices [ 0 ] ;
max_stripe_index = 0 ;
for ( i = 0 ; i < cnt ; i + + ) {
* indexp = be32_to_cpup ( p + + ) ;
max_stripe_index = max ( max_stripe_index , * indexp ) ;
indexp + + ;
2010-10-20 08:18:04 +04:00
}
/* Check the multipath list count */
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( & stream , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err_free_stripe_indices ;
num = be32_to_cpup ( p ) ;
2010-10-20 08:18:04 +04:00
dprintk ( " %s ds_num %u \n " , __func__ , num ) ;
if ( num > NFS4_PNFS_MAX_MULTI_CNT ) {
printk ( KERN_WARNING " %s: multipath count %d greater than "
" supported maximum %d \n " , __func__ ,
num , NFS4_PNFS_MAX_MULTI_CNT ) ;
2011-03-24 23:48:21 +03:00
goto out_err_free_stripe_indices ;
2010-10-20 08:18:04 +04:00
}
2011-03-24 23:48:21 +03:00
/* validate stripe indices are all < num */
if ( max_stripe_index > = num ) {
printk ( KERN_WARNING " %s: stripe index %u >= num ds %u \n " ,
__func__ , max_stripe_index , num ) ;
goto out_err_free_stripe_indices ;
}
2010-10-20 08:18:04 +04:00
dsaddr = kzalloc ( sizeof ( * dsaddr ) +
( sizeof ( struct nfs4_pnfs_ds * ) * ( num - 1 ) ) ,
2011-05-12 02:00:51 +04:00
gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( ! dsaddr )
2011-03-24 23:48:21 +03:00
goto out_err_free_stripe_indices ;
2010-10-20 08:18:04 +04:00
dsaddr - > stripe_count = cnt ;
2011-03-24 23:48:21 +03:00
dsaddr - > stripe_indices = stripe_indices ;
stripe_indices = NULL ;
2010-10-20 08:18:04 +04:00
dsaddr - > ds_num = num ;
2011-05-20 15:47:33 +04:00
nfs4_init_deviceid_node ( & dsaddr - > id_node ,
NFS_SERVER ( ino ) - > pnfs_curr_ld ,
NFS_SERVER ( ino ) - > nfs_client ,
2011-05-20 06:14:47 +04:00
& pdev - > dev_id ) ;
2010-10-20 08:18:04 +04:00
2011-06-01 02:48:57 +04:00
INIT_LIST_HEAD ( & dsaddrs ) ;
2010-10-20 08:18:04 +04:00
for ( i = 0 ; i < dsaddr - > ds_num ; i + + ) {
int j ;
2011-03-24 23:48:21 +03:00
u32 mp_count ;
p = xdr_inline_decode ( & stream , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err_free_deviceid ;
2010-10-20 08:18:04 +04:00
2011-03-24 23:48:21 +03:00
mp_count = be32_to_cpup ( p ) ; /* multipath count */
for ( j = 0 ; j < mp_count ; j + + ) {
2011-06-01 02:48:57 +04:00
da = decode_ds_addr ( & stream , gfp_flags ) ;
if ( da )
list_add_tail ( & da - > da_node , & dsaddrs ) ;
}
if ( list_empty ( & dsaddrs ) ) {
dprintk ( " %s: no suitable DS addresses found \n " ,
__func__ ) ;
goto out_err_free_deviceid ;
}
dsaddr - > ds_list [ i ] = nfs4_pnfs_ds_add ( & dsaddrs , gfp_flags ) ;
if ( ! dsaddr - > ds_list [ i ] )
goto out_err_drain_dsaddrs ;
/* If DS was already in cache, free ds addrs */
while ( ! list_empty ( & dsaddrs ) ) {
da = list_first_entry ( & dsaddrs ,
struct nfs4_pnfs_ds_addr ,
da_node ) ;
list_del_init ( & da - > da_node ) ;
kfree ( da - > da_remotestr ) ;
kfree ( da ) ;
2010-10-20 08:18:04 +04:00
}
}
2011-03-24 23:48:21 +03:00
__free_page ( scratch ) ;
2010-10-20 08:18:04 +04:00
return dsaddr ;
2011-06-01 02:48:57 +04:00
out_err_drain_dsaddrs :
while ( ! list_empty ( & dsaddrs ) ) {
da = list_first_entry ( & dsaddrs , struct nfs4_pnfs_ds_addr ,
da_node ) ;
list_del_init ( & da - > da_node ) ;
kfree ( da - > da_remotestr ) ;
kfree ( da ) ;
}
2011-03-24 23:48:21 +03:00
out_err_free_deviceid :
2010-10-20 08:18:04 +04:00
nfs4_fl_free_deviceid ( dsaddr ) ;
2011-03-24 23:48:21 +03:00
/* stripe_indicies was part of dsaddr */
goto out_err_free_scratch ;
out_err_free_stripe_indices :
kfree ( stripe_indices ) ;
out_err_free_scratch :
__free_page ( scratch ) ;
2010-10-20 08:18:04 +04:00
out_err :
dprintk ( " %s ERROR: returning NULL \n " , __func__ ) ;
return NULL ;
}
/*
2011-03-01 04:34:21 +03:00
* Decode the opaque device specified in ' dev ' and add it to the cache of
* available devices .
2010-10-20 08:18:04 +04:00
*/
2011-03-01 04:34:21 +03:00
static struct nfs4_file_layout_dsaddr *
2011-05-12 02:00:51 +04:00
decode_and_add_device ( struct inode * inode , struct pnfs_device * dev , gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-05-20 06:14:47 +04:00
struct nfs4_deviceid_node * d ;
struct nfs4_file_layout_dsaddr * n , * new ;
2010-10-20 08:18:04 +04:00
2011-05-12 02:00:51 +04:00
new = decode_device ( inode , dev , gfp_flags ) ;
2011-03-01 04:34:21 +03:00
if ( ! new ) {
2010-10-20 08:18:04 +04:00
printk ( KERN_WARNING " %s: Could not decode or add device \n " ,
__func__ ) ;
return NULL ;
}
2011-05-20 06:14:47 +04:00
d = nfs4_insert_deviceid_node ( & new - > id_node ) ;
n = container_of ( d , struct nfs4_file_layout_dsaddr , id_node ) ;
if ( n ! = new ) {
2011-03-01 04:34:21 +03:00
nfs4_fl_free_deviceid ( new ) ;
2011-05-20 06:14:47 +04:00
return n ;
2011-03-01 04:34:21 +03:00
}
return new ;
2010-10-20 08:18:04 +04:00
}
/*
* Retrieve the information for dev_id , add it to the list
* of available devices , and return it .
*/
struct nfs4_file_layout_dsaddr *
2011-05-12 02:00:51 +04:00
get_device_info ( struct inode * inode , struct nfs4_deviceid * dev_id , gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
struct pnfs_device * pdev = NULL ;
u32 max_resp_sz ;
int max_pages ;
struct page * * pages = NULL ;
struct nfs4_file_layout_dsaddr * dsaddr = NULL ;
int rc , i ;
struct nfs_server * server = NFS_SERVER ( inode ) ;
/*
* Use the session max response size as the basis for setting
* GETDEVICEINFO ' s maxcount
*/
max_resp_sz = server - > nfs_client - > cl_session - > fc_attrs . max_resp_sz ;
max_pages = max_resp_sz > > PAGE_SHIFT ;
dprintk ( " %s inode %p max_resp_sz %u max_pages %d \n " ,
__func__ , inode , max_resp_sz , max_pages ) ;
2011-05-12 02:00:51 +04:00
pdev = kzalloc ( sizeof ( struct pnfs_device ) , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( pdev = = NULL )
return NULL ;
2011-05-12 02:00:51 +04:00
pages = kzalloc ( max_pages * sizeof ( struct page * ) , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( pages = = NULL ) {
kfree ( pdev ) ;
return NULL ;
}
for ( i = 0 ; i < max_pages ; i + + ) {
2011-05-12 02:00:51 +04:00
pages [ i ] = alloc_page ( gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( ! pages [ i ] )
goto out_free ;
}
memcpy ( & pdev - > dev_id , dev_id , sizeof ( * dev_id ) ) ;
pdev - > layout_type = LAYOUT_NFSV4_1_FILES ;
pdev - > pages = pages ;
pdev - > pgbase = 0 ;
pdev - > pglen = PAGE_SIZE * max_pages ;
pdev - > mincount = 0 ;
rc = nfs4_proc_getdeviceinfo ( server , pdev ) ;
dprintk ( " %s getdevice info returns %d \n " , __func__ , rc ) ;
if ( rc )
goto out_free ;
/*
* Found new device , need to decode it and then add it to the
* list of known devices for this mountpoint .
*/
2011-05-12 02:00:51 +04:00
dsaddr = decode_and_add_device ( inode , pdev , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
out_free :
for ( i = 0 ; i < max_pages ; i + + )
__free_page ( pages [ i ] ) ;
kfree ( pages ) ;
kfree ( pdev ) ;
dprintk ( " <-- %s dsaddr %p \n " , __func__ , dsaddr ) ;
return dsaddr ;
}
2011-03-01 04:34:21 +03:00
void
nfs4_fl_put_deviceid ( struct nfs4_file_layout_dsaddr * dsaddr )
2010-10-20 08:18:04 +04:00
{
2011-05-20 15:47:33 +04:00
nfs4_put_deviceid_node ( & dsaddr - > id_node ) ;
2010-10-20 08:18:04 +04:00
}
2011-03-01 04:34:18 +03:00
/*
* Want res = ( offset - layout - > pattern_offset ) / layout - > stripe_unit
* Then : ( ( res + fsi ) % dsaddr - > stripe_count )
*/
u32
nfs4_fl_calc_j_index ( struct pnfs_layout_segment * lseg , loff_t offset )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
u64 tmp ;
tmp = offset - flseg - > pattern_offset ;
do_div ( tmp , flseg - > stripe_unit ) ;
tmp + = flseg - > first_stripe_index ;
return do_div ( tmp , flseg - > dsaddr - > stripe_count ) ;
}
u32
nfs4_fl_calc_ds_index ( struct pnfs_layout_segment * lseg , u32 j )
{
return FILELAYOUT_LSEG ( lseg ) - > dsaddr - > stripe_indices [ j ] ;
}
struct nfs_fh *
nfs4_fl_select_ds_fh ( struct pnfs_layout_segment * lseg , u32 j )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
u32 i ;
if ( flseg - > stripe_type = = STRIPE_SPARSE ) {
if ( flseg - > num_fh = = 1 )
i = 0 ;
else if ( flseg - > num_fh = = 0 )
/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
return NULL ;
else
i = nfs4_fl_calc_ds_index ( lseg , j ) ;
} else
i = j ;
return flseg - > fh_array [ i ] ;
}
2011-03-01 04:34:22 +03:00
static void
filelayout_mark_devid_negative ( struct nfs4_file_layout_dsaddr * dsaddr ,
2011-06-01 02:48:56 +04:00
int err , const char * ds_remotestr )
2011-03-01 04:34:22 +03:00
{
2011-05-20 06:14:47 +04:00
u32 * p = ( u32 * ) & dsaddr - > id_node . deviceid ;
2011-03-01 04:34:22 +03:00
2011-06-01 02:48:56 +04:00
printk ( KERN_ERR " NFS: data server %s connection error %d. "
2011-03-01 04:34:22 +03:00
" Deviceid [%x%x%x%x] marked out of use. \n " ,
2011-06-01 02:48:56 +04:00
ds_remotestr , err , p [ 0 ] , p [ 1 ] , p [ 2 ] , p [ 3 ] ) ;
2011-03-01 04:34:22 +03:00
2011-05-20 06:14:47 +04:00
spin_lock ( & nfs4_ds_cache_lock ) ;
2011-03-01 04:34:22 +03:00
dsaddr - > flags | = NFS4_DEVICE_ID_NEG_ENTRY ;
2011-05-20 06:14:47 +04:00
spin_unlock ( & nfs4_ds_cache_lock ) ;
2011-03-01 04:34:22 +03:00
}
2011-03-01 04:34:18 +03:00
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds ( struct pnfs_layout_segment * lseg , u32 ds_idx )
{
struct nfs4_file_layout_dsaddr * dsaddr = FILELAYOUT_LSEG ( lseg ) - > dsaddr ;
struct nfs4_pnfs_ds * ds = dsaddr - > ds_list [ ds_idx ] ;
if ( ds = = NULL ) {
printk ( KERN_ERR " %s: No data server for offset index %d \n " ,
__func__ , ds_idx ) ;
return NULL ;
}
if ( ! ds - > ds_clp ) {
2011-03-01 04:34:22 +03:00
struct nfs_server * s = NFS_SERVER ( lseg - > pls_layout - > plh_inode ) ;
2011-03-01 04:34:18 +03:00
int err ;
2011-03-01 04:34:22 +03:00
if ( dsaddr - > flags & NFS4_DEVICE_ID_NEG_ENTRY ) {
/* Already tried to connect, don't try again */
dprintk ( " %s Deviceid marked out of use \n " , __func__ ) ;
return NULL ;
}
err = nfs4_ds_connect ( s , ds ) ;
2011-03-01 04:34:18 +03:00
if ( err ) {
2011-03-01 04:34:22 +03:00
filelayout_mark_devid_negative ( dsaddr , err ,
2011-06-01 02:48:56 +04:00
ds - > ds_remotestr ) ;
2011-03-01 04:34:18 +03:00
return NULL ;
}
}
return ds ;
}