2010-10-20 08:17:58 +04:00
/*
* pNFS functions to call and manage layout drivers .
*
* Copyright ( c ) 2002 [ year of first publication ]
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand < dhildebz @ umich . edu >
*
* Permission is granted to use , copy , create derivative works , and
* redistribute this software and such derivative works for any purpose ,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific , written prior authorization . If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software , then the disclaimer below must also be included .
*
* This software is provided as is , without representation or warranty
* of any kind either express or implied , including without limitation
* the implied warranties of merchantability , fitness for a particular
* purpose , or noninfringement . The Regents of the University of
* Michigan shall not be liable for any damages , including special ,
* indirect , incidental , or consequential damages , with respect to any
* claim arising out of or in connection with the use of the software ,
* even if it has been or is hereafter advised of the possibility of
* such damages .
*/
# include <linux/nfs_fs.h>
2010-10-20 08:18:02 +04:00
# include "internal.h"
2010-10-20 08:17:58 +04:00
# include "pnfs.h"
2011-03-01 04:34:16 +03:00
# include "iostat.h"
2010-10-20 08:17:58 +04:00
# define NFSDBG_FACILITY NFSDBG_PNFS
2010-10-20 08:17:59 +04:00
/* Locking:
*
* pnfs_spinlock :
* protects pnfs_modules_tbl .
*/
static DEFINE_SPINLOCK ( pnfs_spinlock ) ;
/*
* pnfs_modules_tbl holds all pnfs modules
*/
static LIST_HEAD ( pnfs_modules_tbl ) ;
/* Return the registered pnfs layout driver module matching given id */
static struct pnfs_layoutdriver_type *
find_pnfs_driver_locked ( u32 id )
{
struct pnfs_layoutdriver_type * local ;
list_for_each_entry ( local , & pnfs_modules_tbl , pnfs_tblid )
if ( local - > id = = id )
goto out ;
local = NULL ;
out :
dprintk ( " %s: Searching for id %u, found %p \n " , __func__ , id , local ) ;
return local ;
}
2010-10-20 08:17:58 +04:00
static struct pnfs_layoutdriver_type *
find_pnfs_driver ( u32 id )
{
2010-10-20 08:17:59 +04:00
struct pnfs_layoutdriver_type * local ;
spin_lock ( & pnfs_spinlock ) ;
local = find_pnfs_driver_locked ( id ) ;
spin_unlock ( & pnfs_spinlock ) ;
return local ;
2010-10-20 08:17:58 +04:00
}
void
unset_pnfs_layoutdriver ( struct nfs_server * nfss )
{
2011-03-01 04:34:21 +03:00
if ( nfss - > pnfs_curr_ld )
2010-10-20 08:17:59 +04:00
module_put ( nfss - > pnfs_curr_ld - > owner ) ;
2010-10-20 08:17:58 +04:00
nfss - > pnfs_curr_ld = NULL ;
}
/*
* Try to set the server ' s pnfs module to the pnfs layout type specified by id .
* Currently only one pNFS layout driver per filesystem is supported .
*
* @ id layout type . Zero ( illegal layout type ) indicates pNFS not in use .
*/
void
set_pnfs_layoutdriver ( struct nfs_server * server , u32 id )
{
struct pnfs_layoutdriver_type * ld_type = NULL ;
if ( id = = 0 )
goto out_no_driver ;
if ( ! ( server - > nfs_client - > cl_exchange_flags &
( EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS ) ) ) {
printk ( KERN_ERR " %s: id %u cl_exchange_flags 0x%x \n " , __func__ ,
id , server - > nfs_client - > cl_exchange_flags ) ;
goto out_no_driver ;
}
ld_type = find_pnfs_driver ( id ) ;
if ( ! ld_type ) {
request_module ( " %s-%u " , LAYOUT_NFSV4_1_MODULE_PREFIX , id ) ;
ld_type = find_pnfs_driver ( id ) ;
if ( ! ld_type ) {
dprintk ( " %s: No pNFS module found for %u. \n " ,
__func__ , id ) ;
goto out_no_driver ;
}
}
2010-10-20 08:17:59 +04:00
if ( ! try_module_get ( ld_type - > owner ) ) {
dprintk ( " %s: Could not grab reference on module \n " , __func__ ) ;
goto out_no_driver ;
}
2010-10-20 08:17:58 +04:00
server - > pnfs_curr_ld = ld_type ;
2011-03-01 04:34:21 +03:00
2010-10-20 08:17:58 +04:00
dprintk ( " %s: pNFS module for %u set \n " , __func__ , id ) ;
return ;
out_no_driver :
dprintk ( " %s: Using NFSv4 I/O \n " , __func__ ) ;
server - > pnfs_curr_ld = NULL ;
}
2010-10-20 08:17:59 +04:00
int
pnfs_register_layoutdriver ( struct pnfs_layoutdriver_type * ld_type )
{
int status = - EINVAL ;
struct pnfs_layoutdriver_type * tmp ;
if ( ld_type - > id = = 0 ) {
printk ( KERN_ERR " %s id 0 is reserved \n " , __func__ ) ;
return status ;
}
2010-10-20 08:18:03 +04:00
if ( ! ld_type - > alloc_lseg | | ! ld_type - > free_lseg ) {
printk ( KERN_ERR " %s Layout driver must provide "
" alloc_lseg and free_lseg. \n " , __func__ ) ;
return status ;
}
2010-10-20 08:17:59 +04:00
spin_lock ( & pnfs_spinlock ) ;
tmp = find_pnfs_driver_locked ( ld_type - > id ) ;
if ( ! tmp ) {
list_add ( & ld_type - > pnfs_tblid , & pnfs_modules_tbl ) ;
status = 0 ;
dprintk ( " %s Registering id:%u name:%s \n " , __func__ , ld_type - > id ,
ld_type - > name ) ;
} else {
printk ( KERN_ERR " %s Module with id %d already loaded! \n " ,
__func__ , ld_type - > id ) ;
}
spin_unlock ( & pnfs_spinlock ) ;
return status ;
}
EXPORT_SYMBOL_GPL ( pnfs_register_layoutdriver ) ;
void
pnfs_unregister_layoutdriver ( struct pnfs_layoutdriver_type * ld_type )
{
dprintk ( " %s Deregistering id:%u \n " , __func__ , ld_type - > id ) ;
spin_lock ( & pnfs_spinlock ) ;
list_del ( & ld_type - > pnfs_tblid ) ;
spin_unlock ( & pnfs_spinlock ) ;
}
EXPORT_SYMBOL_GPL ( pnfs_unregister_layoutdriver ) ;
2010-10-20 08:18:01 +04:00
2010-10-20 08:18:03 +04:00
/*
* pNFS client layout cache
*/
2011-01-06 14:36:28 +03:00
/* Need to hold i_lock if caller does not already hold reference */
2011-01-06 14:36:30 +03:00
void
2011-01-06 14:36:28 +03:00
get_layout_hdr ( struct pnfs_layout_hdr * lo )
2010-10-20 08:18:01 +04:00
{
2011-01-06 14:36:28 +03:00
atomic_inc ( & lo - > plh_refcount ) ;
2010-10-20 08:18:01 +04:00
}
2011-05-22 20:51:33 +04:00
static struct pnfs_layout_hdr *
pnfs_alloc_layout_hdr ( struct inode * ino , gfp_t gfp_flags )
{
struct pnfs_layoutdriver_type * ld = NFS_SERVER ( ino ) - > pnfs_curr_ld ;
return ld - > alloc_layout_hdr ? ld - > alloc_layout_hdr ( ino , gfp_flags ) :
kzalloc ( sizeof ( struct pnfs_layout_hdr ) , gfp_flags ) ;
}
static void
pnfs_free_layout_hdr ( struct pnfs_layout_hdr * lo )
{
struct pnfs_layoutdriver_type * ld = NFS_SERVER ( lo - > plh_inode ) - > pnfs_curr_ld ;
return ld - > alloc_layout_hdr ? ld - > free_layout_hdr ( lo ) : kfree ( lo ) ;
}
2010-10-20 08:18:01 +04:00
static void
2011-01-06 14:36:28 +03:00
destroy_layout_hdr ( struct pnfs_layout_hdr * lo )
2010-10-20 08:18:01 +04:00
{
2011-01-06 14:36:28 +03:00
dprintk ( " %s: freeing layout cache %p \n " , __func__ , lo ) ;
BUG_ON ( ! list_empty ( & lo - > plh_layouts ) ) ;
NFS_I ( lo - > plh_inode ) - > layout = NULL ;
2011-05-22 20:51:33 +04:00
pnfs_free_layout_hdr ( lo ) ;
2011-01-06 14:36:28 +03:00
}
2010-10-20 08:18:01 +04:00
2011-01-06 14:36:28 +03:00
static void
put_layout_hdr_locked ( struct pnfs_layout_hdr * lo )
{
if ( atomic_dec_and_test ( & lo - > plh_refcount ) )
destroy_layout_hdr ( lo ) ;
2010-10-20 08:18:01 +04:00
}
2010-10-20 08:18:03 +04:00
void
2011-01-06 14:36:28 +03:00
put_layout_hdr ( struct pnfs_layout_hdr * lo )
2010-10-20 08:18:02 +04:00
{
2011-01-06 14:36:28 +03:00
struct inode * inode = lo - > plh_inode ;
if ( atomic_dec_and_lock ( & lo - > plh_refcount , & inode - > i_lock ) ) {
destroy_layout_hdr ( lo ) ;
spin_unlock ( & inode - > i_lock ) ;
}
2010-10-20 08:18:02 +04:00
}
static void
init_lseg ( struct pnfs_layout_hdr * lo , struct pnfs_layout_segment * lseg )
{
2011-01-06 14:36:20 +03:00
INIT_LIST_HEAD ( & lseg - > pls_list ) ;
2011-01-06 14:36:23 +03:00
atomic_set ( & lseg - > pls_refcount , 1 ) ;
smp_mb ( ) ;
set_bit ( NFS_LSEG_VALID , & lseg - > pls_flags ) ;
2011-01-06 14:36:20 +03:00
lseg - > pls_layout = lo ;
2010-10-20 08:18:02 +04:00
}
2011-01-06 14:36:23 +03:00
static void free_lseg ( struct pnfs_layout_segment * lseg )
2010-10-20 08:18:02 +04:00
{
2011-01-06 14:36:21 +03:00
struct inode * ino = lseg - > pls_layout - > plh_inode ;
2010-10-20 08:18:02 +04:00
2010-10-20 08:18:03 +04:00
NFS_SERVER ( ino ) - > pnfs_curr_ld - > free_lseg ( lseg ) ;
2011-01-06 14:36:18 +03:00
/* Matched by get_layout_hdr in pnfs_insert_layout */
2011-01-06 14:36:28 +03:00
put_layout_hdr ( NFS_I ( ino ) - > layout ) ;
2010-10-20 08:18:02 +04:00
}
2011-03-01 04:34:13 +03:00
static void
put_lseg_common ( struct pnfs_layout_segment * lseg )
{
struct inode * inode = lseg - > pls_layout - > plh_inode ;
2011-05-22 20:52:03 +04:00
WARN_ON ( test_bit ( NFS_LSEG_VALID , & lseg - > pls_flags ) ) ;
2011-03-01 04:34:13 +03:00
list_del_init ( & lseg - > pls_list ) ;
if ( list_empty ( & lseg - > pls_layout - > plh_segs ) ) {
set_bit ( NFS_LAYOUT_DESTROYED , & lseg - > pls_layout - > plh_flags ) ;
/* Matched by initial refcount set in alloc_init_layout_hdr */
put_layout_hdr_locked ( lseg - > pls_layout ) ;
}
rpc_wake_up ( & NFS_SERVER ( inode ) - > roc_rpcwaitq ) ;
}
2011-03-01 04:34:15 +03:00
void
2011-03-01 04:34:13 +03:00
put_lseg ( struct pnfs_layout_segment * lseg )
2010-10-20 08:18:02 +04:00
{
2011-03-01 04:34:13 +03:00
struct inode * inode ;
if ( ! lseg )
return ;
2011-01-06 14:36:23 +03:00
dprintk ( " %s: lseg %p ref %d valid %d \n " , __func__ , lseg ,
atomic_read ( & lseg - > pls_refcount ) ,
test_bit ( NFS_LSEG_VALID , & lseg - > pls_flags ) ) ;
2011-03-01 04:34:13 +03:00
inode = lseg - > pls_layout - > plh_inode ;
if ( atomic_dec_and_lock ( & lseg - > pls_refcount , & inode - > i_lock ) ) {
LIST_HEAD ( free_me ) ;
2011-01-06 14:36:23 +03:00
2011-03-01 04:34:13 +03:00
put_lseg_common ( lseg ) ;
list_add ( & lseg - > pls_list , & free_me ) ;
spin_unlock ( & inode - > i_lock ) ;
pnfs_free_lseg_list ( & free_me ) ;
2011-01-06 14:36:23 +03:00
}
}
2011-03-23 16:27:53 +03:00
EXPORT_SYMBOL_GPL ( put_lseg ) ;
2010-10-20 08:18:02 +04:00
2011-05-22 20:47:26 +04:00
static inline u64
end_offset ( u64 start , u64 len )
{
u64 end ;
end = start + len ;
return end > = start ? end : NFS4_MAX_UINT64 ;
}
/* last octet in a range */
static inline u64
last_byte_offset ( u64 start , u64 len )
{
u64 end ;
BUG_ON ( ! len ) ;
end = start + len ;
return end > start ? end - 1 : NFS4_MAX_UINT64 ;
}
/*
* is l2 fully contained in l1 ?
* start1 end1
* [ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - )
* start2 end2
* [ - - - - - - - - - - - - - - - - )
*/
static inline int
lo_seg_contained ( struct pnfs_layout_range * l1 ,
struct pnfs_layout_range * l2 )
{
u64 start1 = l1 - > offset ;
u64 end1 = end_offset ( start1 , l1 - > length ) ;
u64 start2 = l2 - > offset ;
u64 end2 = end_offset ( start2 , l2 - > length ) ;
return ( start1 < = start2 ) & & ( end1 > = end2 ) ;
}
/*
* is l1 and l2 intersecting ?
* start1 end1
* [ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - )
* start2 end2
* [ - - - - - - - - - - - - - - - - )
*/
static inline int
lo_seg_intersecting ( struct pnfs_layout_range * l1 ,
struct pnfs_layout_range * l2 )
{
u64 start1 = l1 - > offset ;
u64 end1 = end_offset ( start1 , l1 - > length ) ;
u64 start2 = l2 - > offset ;
u64 end2 = end_offset ( start2 , l2 - > length ) ;
return ( end1 = = NFS4_MAX_UINT64 | | end1 > start2 ) & &
( end2 = = NFS4_MAX_UINT64 | | end2 > start1 ) ;
}
2011-01-06 14:36:23 +03:00
static bool
2011-05-22 20:48:02 +04:00
should_free_lseg ( struct pnfs_layout_range * lseg_range ,
struct pnfs_layout_range * recall_range )
2011-01-06 14:36:23 +03:00
{
2011-05-22 20:48:02 +04:00
return ( recall_range - > iomode = = IOMODE_ANY | |
lseg_range - > iomode = = recall_range - > iomode ) & &
lo_seg_intersecting ( lseg_range , recall_range ) ;
2010-10-20 08:18:02 +04:00
}
2011-01-06 14:36:23 +03:00
/* Returns 1 if lseg is removed from list, 0 otherwise */
static int mark_lseg_invalid ( struct pnfs_layout_segment * lseg ,
struct list_head * tmp_list )
{
int rv = 0 ;
if ( test_and_clear_bit ( NFS_LSEG_VALID , & lseg - > pls_flags ) ) {
/* Remove the reference keeping the lseg in the
* list . It will now be removed when all
* outstanding io is finished .
*/
2011-03-01 04:34:13 +03:00
dprintk ( " %s: lseg %p ref %d \n " , __func__ , lseg ,
atomic_read ( & lseg - > pls_refcount ) ) ;
if ( atomic_dec_and_test ( & lseg - > pls_refcount ) ) {
put_lseg_common ( lseg ) ;
list_add ( & lseg - > pls_list , tmp_list ) ;
rv = 1 ;
}
2011-01-06 14:36:23 +03:00
}
return rv ;
}
/* Returns count of number of matching invalid lsegs remaining in list
* after call .
*/
2011-01-06 14:36:30 +03:00
int
2011-01-06 14:36:23 +03:00
mark_matching_lsegs_invalid ( struct pnfs_layout_hdr * lo ,
struct list_head * tmp_list ,
2011-05-22 20:48:02 +04:00
struct pnfs_layout_range * recall_range )
2010-10-20 08:18:02 +04:00
{
struct pnfs_layout_segment * lseg , * next ;
2011-01-06 14:36:23 +03:00
int invalid = 0 , removed = 0 ;
2010-10-20 08:18:02 +04:00
dprintk ( " %s:Begin lo %p \n " , __func__ , lo ) ;
2011-02-03 21:28:50 +03:00
if ( list_empty ( & lo - > plh_segs ) ) {
if ( ! test_and_set_bit ( NFS_LAYOUT_DESTROYED , & lo - > plh_flags ) )
put_layout_hdr_locked ( lo ) ;
return 0 ;
}
2011-01-06 14:36:23 +03:00
list_for_each_entry_safe ( lseg , next , & lo - > plh_segs , pls_list )
2011-05-22 20:48:02 +04:00
if ( ! recall_range | |
should_free_lseg ( & lseg - > pls_range , recall_range ) ) {
2011-01-06 14:36:23 +03:00
dprintk ( " %s: freeing lseg %p iomode %d "
" offset %llu length %llu \n " , __func__ ,
lseg , lseg - > pls_range . iomode , lseg - > pls_range . offset ,
lseg - > pls_range . length ) ;
invalid + + ;
removed + = mark_lseg_invalid ( lseg , tmp_list ) ;
}
dprintk ( " %s:Return %i \n " , __func__ , invalid - removed ) ;
return invalid - removed ;
2010-10-20 08:18:02 +04:00
}
2011-02-03 21:28:52 +03:00
/* note free_me must contain lsegs from a single layout_hdr */
2011-01-06 14:36:30 +03:00
void
2011-01-06 14:36:23 +03:00
pnfs_free_lseg_list ( struct list_head * free_me )
2010-10-20 08:18:02 +04:00
{
2011-01-06 14:36:23 +03:00
struct pnfs_layout_segment * lseg , * tmp ;
2011-02-03 21:28:52 +03:00
struct pnfs_layout_hdr * lo ;
if ( list_empty ( free_me ) )
return ;
lo = list_first_entry ( free_me , struct pnfs_layout_segment ,
pls_list ) - > pls_layout ;
2010-10-20 08:18:02 +04:00
2011-02-03 21:28:52 +03:00
if ( test_bit ( NFS_LAYOUT_DESTROYED , & lo - > plh_flags ) ) {
struct nfs_client * clp ;
clp = NFS_SERVER ( lo - > plh_inode ) - > nfs_client ;
spin_lock ( & clp - > cl_lock ) ;
list_del_init ( & lo - > plh_layouts ) ;
spin_unlock ( & clp - > cl_lock ) ;
}
2011-01-06 14:36:23 +03:00
list_for_each_entry_safe ( lseg , tmp , free_me , pls_list ) {
2011-01-06 14:36:20 +03:00
list_del ( & lseg - > pls_list ) ;
2011-01-06 14:36:23 +03:00
free_lseg ( lseg ) ;
2010-10-20 08:18:02 +04:00
}
}
2010-10-20 08:18:01 +04:00
void
pnfs_destroy_layout ( struct nfs_inode * nfsi )
{
struct pnfs_layout_hdr * lo ;
2010-10-20 08:18:02 +04:00
LIST_HEAD ( tmp_list ) ;
2010-10-20 08:18:01 +04:00
spin_lock ( & nfsi - > vfs_inode . i_lock ) ;
lo = nfsi - > layout ;
if ( lo ) {
2011-02-03 21:28:50 +03:00
lo - > plh_block_lgets + + ; /* permanently block new LAYOUTGETs */
2011-05-22 20:48:02 +04:00
mark_matching_lsegs_invalid ( lo , & tmp_list , NULL ) ;
2010-10-20 08:18:01 +04:00
}
spin_unlock ( & nfsi - > vfs_inode . i_lock ) ;
2010-10-20 08:18:02 +04:00
pnfs_free_lseg_list ( & tmp_list ) ;
}
/*
* Called by the state manger to remove all layouts established under an
* expired lease .
*/
void
pnfs_destroy_all_layouts ( struct nfs_client * clp )
{
struct pnfs_layout_hdr * lo ;
LIST_HEAD ( tmp_list ) ;
spin_lock ( & clp - > cl_lock ) ;
list_splice_init ( & clp - > cl_layouts , & tmp_list ) ;
spin_unlock ( & clp - > cl_lock ) ;
while ( ! list_empty ( & tmp_list ) ) {
lo = list_entry ( tmp_list . next , struct pnfs_layout_hdr ,
2011-01-06 14:36:21 +03:00
plh_layouts ) ;
2010-10-20 08:18:02 +04:00
dprintk ( " %s freeing layout for inode %lu \n " , __func__ ,
2011-01-06 14:36:21 +03:00
lo - > plh_inode - > i_ino ) ;
2011-05-11 09:19:58 +04:00
list_del_init ( & lo - > plh_layouts ) ;
2011-01-06 14:36:21 +03:00
pnfs_destroy_layout ( NFS_I ( lo - > plh_inode ) ) ;
2010-10-20 08:18:02 +04:00
}
2010-10-20 08:18:01 +04:00
}
2011-01-06 14:36:22 +03:00
/* update lo->plh_stateid with new if is more recent */
2011-01-06 14:36:30 +03:00
void
pnfs_set_layout_stateid ( struct pnfs_layout_hdr * lo , const nfs4_stateid * new ,
bool update_barrier )
2010-10-20 08:18:03 +04:00
{
2011-01-06 14:36:22 +03:00
u32 oldseq , newseq ;
2010-10-20 08:18:03 +04:00
2011-01-06 14:36:22 +03:00
oldseq = be32_to_cpu ( lo - > plh_stateid . stateid . seqid ) ;
newseq = be32_to_cpu ( new - > stateid . seqid ) ;
2011-01-06 14:36:30 +03:00
if ( ( int ) ( newseq - oldseq ) > 0 ) {
2011-01-06 14:36:22 +03:00
memcpy ( & lo - > plh_stateid , & new - > stateid , sizeof ( new - > stateid ) ) ;
2011-01-06 14:36:30 +03:00
if ( update_barrier ) {
u32 new_barrier = be32_to_cpu ( new - > stateid . seqid ) ;
if ( ( int ) ( new_barrier - lo - > plh_barrier ) )
lo - > plh_barrier = new_barrier ;
} else {
/* Because of wraparound, we want to keep the barrier
* " close " to the current seqids . It needs to be
* within 2 * * 31 to count as " behind " , so if it
* gets too near that limit , give us a litle leeway
* and bring it to within 2 * * 30.
* NOTE - and yes , this is all unsigned arithmetic .
*/
if ( unlikely ( ( newseq - lo - > plh_barrier ) > ( 3 < < 29 ) ) )
lo - > plh_barrier = newseq - ( 1 < < 30 ) ;
}
}
2010-10-20 08:18:03 +04:00
}
2011-01-06 14:36:25 +03:00
/* lget is set to 1 if called from inside send_layoutget call chain */
static bool
2011-01-06 14:36:30 +03:00
pnfs_layoutgets_blocked ( struct pnfs_layout_hdr * lo , nfs4_stateid * stateid ,
int lget )
{
if ( ( stateid ) & &
( int ) ( lo - > plh_barrier - be32_to_cpu ( stateid - > stateid . seqid ) ) > = 0 )
return true ;
2011-01-06 14:36:32 +03:00
return lo - > plh_block_lgets | |
2011-02-03 21:28:50 +03:00
test_bit ( NFS_LAYOUT_DESTROYED , & lo - > plh_flags ) | |
2011-01-06 14:36:32 +03:00
test_bit ( NFS_LAYOUT_BULK_RECALL , & lo - > plh_flags ) | |
2011-01-06 14:36:30 +03:00
( list_empty ( & lo - > plh_segs ) & &
2011-01-06 14:36:25 +03:00
( atomic_read ( & lo - > plh_outstanding ) > lget ) ) ;
}
2011-01-06 14:36:22 +03:00
int
pnfs_choose_layoutget_stateid ( nfs4_stateid * dst , struct pnfs_layout_hdr * lo ,
struct nfs4_state * open_state )
2010-10-20 08:18:03 +04:00
{
2011-01-06 14:36:22 +03:00
int status = 0 ;
2010-10-20 08:18:02 +04:00
2010-10-20 08:18:03 +04:00
dprintk ( " --> %s \n " , __func__ ) ;
2011-01-06 14:36:22 +03:00
spin_lock ( & lo - > plh_inode - > i_lock ) ;
2011-01-06 14:36:30 +03:00
if ( pnfs_layoutgets_blocked ( lo , NULL , 1 ) ) {
2011-01-06 14:36:25 +03:00
status = - EAGAIN ;
} else if ( list_empty ( & lo - > plh_segs ) ) {
2011-01-06 14:36:22 +03:00
int seq ;
do {
seq = read_seqbegin ( & open_state - > seqlock ) ;
memcpy ( dst - > data , open_state - > stateid . data ,
sizeof ( open_state - > stateid . data ) ) ;
} while ( read_seqretry ( & open_state - > seqlock , seq ) ) ;
} else
memcpy ( dst - > data , lo - > plh_stateid . data , sizeof ( lo - > plh_stateid . data ) ) ;
spin_unlock ( & lo - > plh_inode - > i_lock ) ;
2010-10-20 08:18:03 +04:00
dprintk ( " <-- %s \n " , __func__ ) ;
2011-01-06 14:36:22 +03:00
return status ;
2010-10-20 08:18:03 +04:00
}
/*
* Get layout from server .
* for now , assume that whole file layouts are requested .
* arg - > offset : 0
* arg - > length : all ones
*/
2010-10-20 08:18:01 +04:00
static struct pnfs_layout_segment *
send_layoutget ( struct pnfs_layout_hdr * lo ,
struct nfs_open_context * ctx ,
2011-05-22 20:47:26 +04:00
struct pnfs_layout_range * range ,
2011-05-12 02:00:51 +04:00
gfp_t gfp_flags )
2010-10-20 08:18:01 +04:00
{
2011-01-06 14:36:21 +03:00
struct inode * ino = lo - > plh_inode ;
2010-10-20 08:18:03 +04:00
struct nfs_server * server = NFS_SERVER ( ino ) ;
struct nfs4_layoutget * lgp ;
struct pnfs_layout_segment * lseg = NULL ;
2011-03-24 23:48:21 +03:00
struct page * * pages = NULL ;
int i ;
u32 max_resp_sz , max_pages ;
2010-10-20 08:18:03 +04:00
dprintk ( " --> %s \n " , __func__ ) ;
2010-10-20 08:18:01 +04:00
2010-10-20 08:18:03 +04:00
BUG_ON ( ctx = = NULL ) ;
2011-05-12 02:00:51 +04:00
lgp = kzalloc ( sizeof ( * lgp ) , gfp_flags ) ;
2011-01-06 14:36:25 +03:00
if ( lgp = = NULL )
2010-10-20 08:18:03 +04:00
return NULL ;
2011-03-24 23:48:21 +03:00
/* allocate pages for xdr post processing */
max_resp_sz = server - > nfs_client - > cl_session - > fc_attrs . max_resp_sz ;
max_pages = max_resp_sz > > PAGE_SHIFT ;
2011-05-12 02:00:51 +04:00
pages = kzalloc ( max_pages * sizeof ( struct page * ) , gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! pages )
goto out_err_free ;
for ( i = 0 ; i < max_pages ; i + + ) {
2011-05-12 02:00:51 +04:00
pages [ i ] = alloc_page ( gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! pages [ i ] )
goto out_err_free ;
}
2011-05-22 20:47:26 +04:00
lgp - > args . minlength = PAGE_CACHE_SIZE ;
if ( lgp - > args . minlength > range - > length )
lgp - > args . minlength = range - > length ;
2010-10-20 08:18:03 +04:00
lgp - > args . maxcount = PNFS_LAYOUT_MAXSIZE ;
2011-05-22 20:47:26 +04:00
lgp - > args . range = * range ;
2010-10-20 08:18:03 +04:00
lgp - > args . type = server - > pnfs_curr_ld - > id ;
lgp - > args . inode = ino ;
lgp - > args . ctx = get_nfs_open_context ( ctx ) ;
2011-03-24 23:48:21 +03:00
lgp - > args . layout . pages = pages ;
lgp - > args . layout . pglen = max_pages * PAGE_SIZE ;
2010-10-20 08:18:03 +04:00
lgp - > lsegpp = & lseg ;
2011-05-12 02:00:51 +04:00
lgp - > gfp_flags = gfp_flags ;
2010-10-20 08:18:03 +04:00
/* Synchronously retrieve layout information from server and
* store in lseg .
*/
nfs4_proc_layoutget ( lgp ) ;
2010-10-20 08:18:02 +04:00
if ( ! lseg ) {
2010-10-20 08:18:03 +04:00
/* remember that LAYOUTGET failed and suspend trying */
2011-05-22 20:47:26 +04:00
set_bit ( lo_fail_bit ( range - > iomode ) , & lo - > plh_flags ) ;
2010-10-20 08:18:02 +04:00
}
2011-03-24 23:48:21 +03:00
/* free xdr pages */
for ( i = 0 ; i < max_pages ; i + + )
__free_page ( pages [ i ] ) ;
kfree ( pages ) ;
2010-10-20 08:18:02 +04:00
return lseg ;
2011-03-24 23:48:21 +03:00
out_err_free :
/* free any allocated xdr pages, lgp as it's not used */
if ( pages ) {
for ( i = 0 ; i < max_pages ; i + + ) {
if ( ! pages [ i ] )
break ;
__free_page ( pages [ i ] ) ;
}
kfree ( pages ) ;
}
kfree ( lgp ) ;
return NULL ;
2010-10-20 08:18:02 +04:00
}
2011-01-06 14:36:32 +03:00
bool pnfs_roc ( struct inode * ino )
{
struct pnfs_layout_hdr * lo ;
struct pnfs_layout_segment * lseg , * tmp ;
LIST_HEAD ( tmp_list ) ;
bool found = false ;
spin_lock ( & ino - > i_lock ) ;
lo = NFS_I ( ino ) - > layout ;
if ( ! lo | | ! test_and_clear_bit ( NFS_LAYOUT_ROC , & lo - > plh_flags ) | |
test_bit ( NFS_LAYOUT_BULK_RECALL , & lo - > plh_flags ) )
goto out_nolayout ;
list_for_each_entry_safe ( lseg , tmp , & lo - > plh_segs , pls_list )
if ( test_bit ( NFS_LSEG_ROC , & lseg - > pls_flags ) ) {
mark_lseg_invalid ( lseg , & tmp_list ) ;
found = true ;
}
if ( ! found )
goto out_nolayout ;
lo - > plh_block_lgets + + ;
get_layout_hdr ( lo ) ; /* matched in pnfs_roc_release */
spin_unlock ( & ino - > i_lock ) ;
pnfs_free_lseg_list ( & tmp_list ) ;
return true ;
out_nolayout :
spin_unlock ( & ino - > i_lock ) ;
return false ;
}
void pnfs_roc_release ( struct inode * ino )
{
struct pnfs_layout_hdr * lo ;
spin_lock ( & ino - > i_lock ) ;
lo = NFS_I ( ino ) - > layout ;
lo - > plh_block_lgets - - ;
put_layout_hdr_locked ( lo ) ;
spin_unlock ( & ino - > i_lock ) ;
}
void pnfs_roc_set_barrier ( struct inode * ino , u32 barrier )
{
struct pnfs_layout_hdr * lo ;
spin_lock ( & ino - > i_lock ) ;
lo = NFS_I ( ino ) - > layout ;
if ( ( int ) ( barrier - lo - > plh_barrier ) > 0 )
lo - > plh_barrier = barrier ;
spin_unlock ( & ino - > i_lock ) ;
}
bool pnfs_roc_drain ( struct inode * ino , u32 * barrier )
{
struct nfs_inode * nfsi = NFS_I ( ino ) ;
struct pnfs_layout_segment * lseg ;
bool found = false ;
spin_lock ( & ino - > i_lock ) ;
list_for_each_entry ( lseg , & nfsi - > layout - > plh_segs , pls_list )
if ( test_bit ( NFS_LSEG_ROC , & lseg - > pls_flags ) ) {
found = true ;
break ;
}
if ( ! found ) {
struct pnfs_layout_hdr * lo = nfsi - > layout ;
u32 current_seqid = be32_to_cpu ( lo - > plh_stateid . stateid . seqid ) ;
/* Since close does not return a layout stateid for use as
* a barrier , we choose the worst - case barrier .
*/
* barrier = current_seqid + atomic_read ( & lo - > plh_outstanding ) ;
}
spin_unlock ( & ino - > i_lock ) ;
return found ;
}
2010-10-20 08:18:03 +04:00
/*
* Compare two layout segments for sorting into layout cache .
* We want to preferentially return RW over RO layouts , so ensure those
* are seen first .
*/
static s64
2011-05-22 20:47:26 +04:00
cmp_layout ( struct pnfs_layout_range * l1 ,
struct pnfs_layout_range * l2 )
2010-10-20 08:18:03 +04:00
{
2011-05-22 20:47:26 +04:00
s64 d ;
/* high offset > low offset */
d = l1 - > offset - l2 - > offset ;
if ( d )
return d ;
/* short length > long length */
d = l2 - > length - l1 - > length ;
if ( d )
return d ;
2010-10-20 08:18:03 +04:00
/* read > read/write */
2011-05-22 20:47:26 +04:00
return ( int ) ( l1 - > iomode = = IOMODE_READ ) - ( int ) ( l2 - > iomode = = IOMODE_READ ) ;
2010-10-20 08:18:03 +04:00
}
2010-10-20 08:18:02 +04:00
static void
pnfs_insert_layout ( struct pnfs_layout_hdr * lo ,
struct pnfs_layout_segment * lseg )
{
2010-10-20 08:18:03 +04:00
struct pnfs_layout_segment * lp ;
2010-10-20 08:18:02 +04:00
dprintk ( " %s:Begin \n " , __func__ ) ;
2011-01-06 14:36:21 +03:00
assert_spin_locked ( & lo - > plh_inode - > i_lock ) ;
list_for_each_entry ( lp , & lo - > plh_segs , pls_list ) {
2011-05-22 20:47:26 +04:00
if ( cmp_layout ( & lseg - > pls_range , & lp - > pls_range ) > 0 )
2010-10-20 08:18:03 +04:00
continue ;
2011-01-06 14:36:20 +03:00
list_add_tail ( & lseg - > pls_list , & lp - > pls_list ) ;
2010-10-20 08:18:03 +04:00
dprintk ( " %s: inserted lseg %p "
" iomode %d offset %llu length %llu before "
" lp %p iomode %d offset %llu length %llu \n " ,
2011-01-06 14:36:20 +03:00
__func__ , lseg , lseg - > pls_range . iomode ,
lseg - > pls_range . offset , lseg - > pls_range . length ,
lp , lp - > pls_range . iomode , lp - > pls_range . offset ,
lp - > pls_range . length ) ;
2011-05-22 20:47:26 +04:00
goto out ;
2010-10-20 08:18:02 +04:00
}
2011-05-22 20:47:26 +04:00
list_add_tail ( & lseg - > pls_list , & lo - > plh_segs ) ;
dprintk ( " %s: inserted lseg %p "
" iomode %d offset %llu length %llu at tail \n " ,
__func__ , lseg , lseg - > pls_range . iomode ,
lseg - > pls_range . offset , lseg - > pls_range . length ) ;
out :
2011-01-06 14:36:28 +03:00
get_layout_hdr ( lo ) ;
2010-10-20 08:18:02 +04:00
dprintk ( " %s:Return \n " , __func__ ) ;
2010-10-20 08:18:01 +04:00
}
static struct pnfs_layout_hdr *
2011-05-12 02:00:51 +04:00
alloc_init_layout_hdr ( struct inode * ino , gfp_t gfp_flags )
2010-10-20 08:18:01 +04:00
{
struct pnfs_layout_hdr * lo ;
2011-05-22 20:51:33 +04:00
lo = pnfs_alloc_layout_hdr ( ino , gfp_flags ) ;
2010-10-20 08:18:01 +04:00
if ( ! lo )
return NULL ;
2011-01-06 14:36:28 +03:00
atomic_set ( & lo - > plh_refcount , 1 ) ;
2011-01-06 14:36:21 +03:00
INIT_LIST_HEAD ( & lo - > plh_layouts ) ;
INIT_LIST_HEAD ( & lo - > plh_segs ) ;
2011-01-06 14:36:30 +03:00
INIT_LIST_HEAD ( & lo - > plh_bulk_recall ) ;
2011-01-06 14:36:21 +03:00
lo - > plh_inode = ino ;
2010-10-20 08:18:01 +04:00
return lo ;
}
static struct pnfs_layout_hdr *
2011-05-12 02:00:51 +04:00
pnfs_find_alloc_layout ( struct inode * ino , gfp_t gfp_flags )
2010-10-20 08:18:01 +04:00
{
struct nfs_inode * nfsi = NFS_I ( ino ) ;
struct pnfs_layout_hdr * new = NULL ;
dprintk ( " %s Begin ino=%p layout=%p \n " , __func__ , ino , nfsi - > layout ) ;
assert_spin_locked ( & ino - > i_lock ) ;
2011-01-06 14:36:23 +03:00
if ( nfsi - > layout ) {
if ( test_bit ( NFS_LAYOUT_DESTROYED , & nfsi - > layout - > plh_flags ) )
return NULL ;
else
return nfsi - > layout ;
}
2010-10-20 08:18:01 +04:00
spin_unlock ( & ino - > i_lock ) ;
2011-05-12 02:00:51 +04:00
new = alloc_init_layout_hdr ( ino , gfp_flags ) ;
2010-10-20 08:18:01 +04:00
spin_lock ( & ino - > i_lock ) ;
if ( likely ( nfsi - > layout = = NULL ) ) /* Won the race? */
nfsi - > layout = new ;
else
2011-05-22 20:51:33 +04:00
pnfs_free_layout_hdr ( new ) ;
2010-10-20 08:18:01 +04:00
return nfsi - > layout ;
}
2010-10-20 08:18:03 +04:00
/*
* iomode matching rules :
* iomode lseg match
* - - - - - - - - - - - - - - -
* ANY READ true
* ANY RW true
* RW READ false
* RW RW true
* READ READ true
* READ RW true
*/
static int
2011-05-22 20:47:26 +04:00
is_matching_lseg ( struct pnfs_layout_range * ls_range ,
struct pnfs_layout_range * range )
2010-10-20 08:18:03 +04:00
{
2011-05-22 20:47:26 +04:00
struct pnfs_layout_range range1 ;
if ( ( range - > iomode = = IOMODE_RW & &
ls_range - > iomode ! = IOMODE_RW ) | |
! lo_seg_intersecting ( ls_range , range ) )
return 0 ;
/* range1 covers only the first byte in the range */
range1 = * range ;
range1 . length = 1 ;
return lo_seg_contained ( ls_range , & range1 ) ;
2010-10-20 08:18:03 +04:00
}
/*
* lookup range in layout
*/
2010-10-20 08:18:01 +04:00
static struct pnfs_layout_segment *
2011-05-22 20:47:26 +04:00
pnfs_find_lseg ( struct pnfs_layout_hdr * lo ,
struct pnfs_layout_range * range )
2010-10-20 08:18:01 +04:00
{
2010-10-20 08:18:03 +04:00
struct pnfs_layout_segment * lseg , * ret = NULL ;
dprintk ( " %s:Begin \n " , __func__ ) ;
2011-01-06 14:36:21 +03:00
assert_spin_locked ( & lo - > plh_inode - > i_lock ) ;
list_for_each_entry ( lseg , & lo - > plh_segs , pls_list ) {
2011-01-06 14:36:23 +03:00
if ( test_bit ( NFS_LSEG_VALID , & lseg - > pls_flags ) & &
2011-05-22 20:47:26 +04:00
is_matching_lseg ( & lseg - > pls_range , range ) ) {
2011-03-01 04:34:13 +03:00
ret = get_lseg ( lseg ) ;
2010-10-20 08:18:03 +04:00
break ;
}
2011-05-22 20:47:26 +04:00
if ( cmp_layout ( range , & lseg - > pls_range ) > 0 )
2010-10-20 08:18:03 +04:00
break ;
}
dprintk ( " %s:Return lseg %p ref %d \n " ,
2011-01-06 14:36:23 +03:00
__func__ , ret , ret ? atomic_read ( & ret - > pls_refcount ) : 0 ) ;
2010-10-20 08:18:03 +04:00
return ret ;
2010-10-20 08:18:01 +04:00
}
/*
* Layout segment is retreived from the server if not cached .
* The appropriate layout segment is referenced and returned to the caller .
*/
struct pnfs_layout_segment *
pnfs_update_layout ( struct inode * ino ,
struct nfs_open_context * ctx ,
2011-05-22 20:47:26 +04:00
loff_t pos ,
u64 count ,
2011-05-12 02:00:51 +04:00
enum pnfs_iomode iomode ,
gfp_t gfp_flags )
2010-10-20 08:18:01 +04:00
{
2011-05-22 20:47:26 +04:00
struct pnfs_layout_range arg = {
. iomode = iomode ,
. offset = pos ,
. length = count ,
} ;
2011-05-22 20:47:46 +04:00
unsigned pg_offset ;
2010-10-20 08:18:01 +04:00
struct nfs_inode * nfsi = NFS_I ( ino ) ;
2011-01-06 14:36:26 +03:00
struct nfs_client * clp = NFS_SERVER ( ino ) - > nfs_client ;
2010-10-20 08:18:01 +04:00
struct pnfs_layout_hdr * lo ;
struct pnfs_layout_segment * lseg = NULL ;
2011-02-03 21:28:52 +03:00
bool first = false ;
2010-10-20 08:18:01 +04:00
if ( ! pnfs_enabled_sb ( NFS_SERVER ( ino ) ) )
return NULL ;
spin_lock ( & ino - > i_lock ) ;
2011-05-12 02:00:51 +04:00
lo = pnfs_find_alloc_layout ( ino , gfp_flags ) ;
2010-10-20 08:18:01 +04:00
if ( lo = = NULL ) {
dprintk ( " %s ERROR: can't get pnfs_layout_hdr \n " , __func__ ) ;
goto out_unlock ;
}
2011-01-06 14:36:30 +03:00
/* Do we even need to bother with this? */
if ( test_bit ( NFS4CLNT_LAYOUTRECALL , & clp - > cl_state ) | |
test_bit ( NFS_LAYOUT_BULK_RECALL , & lo - > plh_flags ) ) {
dprintk ( " %s matches recall, use MDS \n " , __func__ ) ;
2010-10-20 08:18:01 +04:00
goto out_unlock ;
}
/* if LAYOUTGET already failed once we don't try again */
2011-01-06 14:36:20 +03:00
if ( test_bit ( lo_fail_bit ( iomode ) , & nfsi - > layout - > plh_flags ) )
2010-10-20 08:18:01 +04:00
goto out_unlock ;
2011-03-01 04:34:22 +03:00
/* Check to see if the layout for the given range already exists */
2011-05-22 20:47:26 +04:00
lseg = pnfs_find_lseg ( lo , & arg ) ;
2011-03-01 04:34:22 +03:00
if ( lseg )
goto out_unlock ;
2011-01-06 14:36:30 +03:00
if ( pnfs_layoutgets_blocked ( lo , NULL , 0 ) )
2011-01-06 14:36:25 +03:00
goto out_unlock ;
atomic_inc ( & lo - > plh_outstanding ) ;
2011-01-06 14:36:28 +03:00
get_layout_hdr ( lo ) ;
2011-02-03 21:28:52 +03:00
if ( list_empty ( & lo - > plh_segs ) )
first = true ;
spin_unlock ( & ino - > i_lock ) ;
if ( first ) {
2011-01-06 14:36:26 +03:00
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL ( FILE ) coming in .
*/
spin_lock ( & clp - > cl_lock ) ;
BUG_ON ( ! list_empty ( & lo - > plh_layouts ) ) ;
list_add_tail ( & lo - > plh_layouts , & clp - > cl_layouts ) ;
spin_unlock ( & clp - > cl_lock ) ;
}
2010-10-20 08:18:01 +04:00
2011-05-22 20:47:46 +04:00
pg_offset = arg . offset & ~ PAGE_CACHE_MASK ;
if ( pg_offset ) {
arg . offset - = pg_offset ;
arg . length + = pg_offset ;
}
arg . length = PAGE_CACHE_ALIGN ( arg . length ) ;
2011-05-22 20:47:26 +04:00
lseg = send_layoutget ( lo , ctx , & arg , gfp_flags ) ;
2011-02-03 21:28:52 +03:00
if ( ! lseg & & first ) {
spin_lock ( & clp - > cl_lock ) ;
list_del_init ( & lo - > plh_layouts ) ;
spin_unlock ( & clp - > cl_lock ) ;
2011-01-06 14:36:26 +03:00
}
2011-01-06 14:36:25 +03:00
atomic_dec ( & lo - > plh_outstanding ) ;
2011-01-06 14:36:28 +03:00
put_layout_hdr ( lo ) ;
2010-10-20 08:18:01 +04:00
out :
dprintk ( " %s end, state 0x%lx lseg %p \n " , __func__ ,
2011-03-01 04:34:07 +03:00
nfsi - > layout ? nfsi - > layout - > plh_flags : - 1 , lseg ) ;
2010-10-20 08:18:01 +04:00
return lseg ;
out_unlock :
spin_unlock ( & ino - > i_lock ) ;
goto out ;
}
2010-10-20 08:18:03 +04:00
int
pnfs_layout_process ( struct nfs4_layoutget * lgp )
{
struct pnfs_layout_hdr * lo = NFS_I ( lgp - > args . inode ) - > layout ;
struct nfs4_layoutget_res * res = & lgp - > res ;
struct pnfs_layout_segment * lseg ;
2011-01-06 14:36:21 +03:00
struct inode * ino = lo - > plh_inode ;
2011-01-06 14:36:30 +03:00
struct nfs_client * clp = NFS_SERVER ( ino ) - > nfs_client ;
2010-10-20 08:18:03 +04:00
int status = 0 ;
/* Inject layout blob into I/O device driver */
2011-05-12 02:00:51 +04:00
lseg = NFS_SERVER ( ino ) - > pnfs_curr_ld - > alloc_lseg ( lo , res , lgp - > gfp_flags ) ;
2010-10-20 08:18:03 +04:00
if ( ! lseg | | IS_ERR ( lseg ) ) {
if ( ! lseg )
status = - ENOMEM ;
else
status = PTR_ERR ( lseg ) ;
dprintk ( " %s: Could not allocate layout: error %d \n " ,
__func__ , status ) ;
goto out ;
}
spin_lock ( & ino - > i_lock ) ;
2011-01-06 14:36:30 +03:00
if ( test_bit ( NFS4CLNT_LAYOUTRECALL , & clp - > cl_state ) | |
test_bit ( NFS_LAYOUT_BULK_RECALL , & lo - > plh_flags ) ) {
dprintk ( " %s forget reply due to recall \n " , __func__ ) ;
goto out_forget_reply ;
}
if ( pnfs_layoutgets_blocked ( lo , & res - > stateid , 1 ) ) {
dprintk ( " %s forget reply due to state \n " , __func__ ) ;
goto out_forget_reply ;
}
2010-10-20 08:18:03 +04:00
init_lseg ( lo , lseg ) ;
2011-01-06 14:36:20 +03:00
lseg - > pls_range = res - > range ;
2011-03-01 04:34:13 +03:00
* lgp - > lsegpp = get_lseg ( lseg ) ;
2010-10-20 08:18:03 +04:00
pnfs_insert_layout ( lo , lseg ) ;
2011-01-06 14:36:32 +03:00
if ( res - > return_on_close ) {
set_bit ( NFS_LSEG_ROC , & lseg - > pls_flags ) ;
set_bit ( NFS_LAYOUT_ROC , & lo - > plh_flags ) ;
}
2010-10-20 08:18:03 +04:00
/* Done processing layoutget. Set the layout stateid */
2011-01-06 14:36:30 +03:00
pnfs_set_layout_stateid ( lo , & res - > stateid , false ) ;
2010-10-20 08:18:03 +04:00
spin_unlock ( & ino - > i_lock ) ;
out :
return status ;
2011-01-06 14:36:30 +03:00
out_forget_reply :
spin_unlock ( & ino - > i_lock ) ;
lseg - > pls_layout = lo ;
NFS_SERVER ( ino ) - > pnfs_curr_ld - > free_lseg ( lseg ) ;
goto out ;
2010-10-20 08:18:03 +04:00
}
2011-03-01 04:34:15 +03:00
static int pnfs_read_pg_test ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * prev ,
struct nfs_page * req )
2011-03-01 04:34:14 +03:00
{
2011-03-01 04:34:15 +03:00
if ( pgio - > pg_count = = prev - > wb_bytes ) {
/* This is first coelesce call for a series of nfs_pages */
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
prev - > wb_context ,
2011-05-22 20:47:26 +04:00
req_offset ( req ) ,
pgio - > pg_count ,
2011-05-12 02:00:51 +04:00
IOMODE_READ ,
GFP_KERNEL ) ;
2011-05-22 20:47:26 +04:00
} else if ( pgio - > pg_lseg & &
req_offset ( req ) > end_offset ( pgio - > pg_lseg - > pls_range . offset ,
pgio - > pg_lseg - > pls_range . length ) )
return 0 ;
2011-03-01 04:34:15 +03:00
return NFS_SERVER ( pgio - > pg_inode ) - > pnfs_curr_ld - > pg_test ( pgio , prev , req ) ;
2011-03-01 04:34:14 +03:00
}
void
2011-03-01 04:34:15 +03:00
pnfs_pageio_init_read ( struct nfs_pageio_descriptor * pgio , struct inode * inode )
2011-03-01 04:34:14 +03:00
{
2011-03-01 04:34:15 +03:00
struct pnfs_layoutdriver_type * ld ;
ld = NFS_SERVER ( inode ) - > pnfs_curr_ld ;
pgio - > pg_test = ( ld & & ld - > pg_test ) ? pnfs_read_pg_test : NULL ;
2011-03-01 04:34:14 +03:00
}
2011-03-03 18:13:44 +03:00
static int pnfs_write_pg_test ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * prev ,
struct nfs_page * req )
{
if ( pgio - > pg_count = = prev - > wb_bytes ) {
/* This is first coelesce call for a series of nfs_pages */
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
prev - > wb_context ,
2011-05-22 20:47:26 +04:00
req_offset ( req ) ,
pgio - > pg_count ,
2011-05-12 02:00:51 +04:00
IOMODE_RW ,
GFP_NOFS ) ;
2011-05-22 20:47:26 +04:00
} else if ( pgio - > pg_lseg & &
req_offset ( req ) > end_offset ( pgio - > pg_lseg - > pls_range . offset ,
pgio - > pg_lseg - > pls_range . length ) )
return 0 ;
2011-03-03 18:13:44 +03:00
return NFS_SERVER ( pgio - > pg_inode ) - > pnfs_curr_ld - > pg_test ( pgio , prev , req ) ;
}
void
pnfs_pageio_init_write ( struct nfs_pageio_descriptor * pgio , struct inode * inode )
{
struct pnfs_layoutdriver_type * ld ;
ld = NFS_SERVER ( inode ) - > pnfs_curr_ld ;
pgio - > pg_test = ( ld & & ld - > pg_test ) ? pnfs_write_pg_test : NULL ;
}
2011-05-22 20:52:03 +04:00
/*
* Called by non rpc - based layout drivers
*/
int
pnfs_ld_write_done ( struct nfs_write_data * data )
{
int status ;
if ( ! data - > pnfs_error ) {
pnfs_set_layoutcommit ( data ) ;
data - > mds_ops - > rpc_call_done ( & data - > task , data ) ;
data - > mds_ops - > rpc_release ( data ) ;
return 0 ;
}
dprintk ( " %s: pnfs_error=%d, retry via MDS \n " , __func__ ,
data - > pnfs_error ) ;
status = nfs_initiate_write ( data , NFS_CLIENT ( data - > inode ) ,
data - > mds_ops , NFS_FILE_SYNC ) ;
return status ? : - EAGAIN ;
}
EXPORT_SYMBOL_GPL ( pnfs_ld_write_done ) ;
2011-03-03 18:13:45 +03:00
enum pnfs_try_status
pnfs_try_to_write_data ( struct nfs_write_data * wdata ,
const struct rpc_call_ops * call_ops , int how )
{
struct inode * inode = wdata - > inode ;
enum pnfs_try_status trypnfs ;
struct nfs_server * nfss = NFS_SERVER ( inode ) ;
wdata - > mds_ops = call_ops ;
dprintk ( " %s: Writing ino:%lu %u@%llu (how %d) \n " , __func__ ,
inode - > i_ino , wdata - > args . count , wdata - > args . offset , how ) ;
trypnfs = nfss - > pnfs_curr_ld - > write_pagelist ( wdata , how ) ;
if ( trypnfs = = PNFS_NOT_ATTEMPTED ) {
put_lseg ( wdata - > lseg ) ;
wdata - > lseg = NULL ;
} else
nfs_inc_stats ( inode , NFSIOS_PNFS_WRITE ) ;
dprintk ( " %s End (trypnfs:%d) \n " , __func__ , trypnfs ) ;
return trypnfs ;
}
2011-05-22 20:52:03 +04:00
/*
* Called by non rpc - based layout drivers
*/
int
pnfs_ld_read_done ( struct nfs_read_data * data )
{
int status ;
if ( ! data - > pnfs_error ) {
__nfs4_read_done_cb ( data ) ;
data - > mds_ops - > rpc_call_done ( & data - > task , data ) ;
data - > mds_ops - > rpc_release ( data ) ;
return 0 ;
}
dprintk ( " %s: pnfs_error=%d, retry via MDS \n " , __func__ ,
data - > pnfs_error ) ;
status = nfs_initiate_read ( data , NFS_CLIENT ( data - > inode ) ,
data - > mds_ops ) ;
return status ? : - EAGAIN ;
}
EXPORT_SYMBOL_GPL ( pnfs_ld_read_done ) ;
2011-03-01 04:34:16 +03:00
/*
* Call the appropriate parallel I / O subsystem read function .
*/
enum pnfs_try_status
pnfs_try_to_read_data ( struct nfs_read_data * rdata ,
const struct rpc_call_ops * call_ops )
{
struct inode * inode = rdata - > inode ;
struct nfs_server * nfss = NFS_SERVER ( inode ) ;
enum pnfs_try_status trypnfs ;
rdata - > mds_ops = call_ops ;
dprintk ( " %s: Reading ino:%lu %u@%llu \n " ,
__func__ , inode - > i_ino , rdata - > args . count , rdata - > args . offset ) ;
trypnfs = nfss - > pnfs_curr_ld - > read_pagelist ( rdata ) ;
if ( trypnfs = = PNFS_NOT_ATTEMPTED ) {
put_lseg ( rdata - > lseg ) ;
rdata - > lseg = NULL ;
} else {
nfs_inc_stats ( inode , NFSIOS_PNFS_READ ) ;
}
dprintk ( " %s End (trypnfs:%d) \n " , __func__ , trypnfs ) ;
return trypnfs ;
}
2011-03-23 16:27:54 +03:00
/*
* Currently there is only one ( whole file ) write lseg .
*/
static struct pnfs_layout_segment * pnfs_list_write_lseg ( struct inode * inode )
{
struct pnfs_layout_segment * lseg , * rv = NULL ;
list_for_each_entry ( lseg , & NFS_I ( inode ) - > layout - > plh_segs , pls_list )
if ( lseg - > pls_range . iomode = = IOMODE_RW )
rv = lseg ;
return rv ;
}
void
pnfs_set_layoutcommit ( struct nfs_write_data * wdata )
{
struct nfs_inode * nfsi = NFS_I ( wdata - > inode ) ;
loff_t end_pos = wdata - > args . offset + wdata - > res . count ;
2011-04-13 18:53:51 +04:00
bool mark_as_dirty = false ;
2011-03-23 16:27:54 +03:00
spin_lock ( & nfsi - > vfs_inode . i_lock ) ;
if ( ! test_and_set_bit ( NFS_INO_LAYOUTCOMMIT , & nfsi - > flags ) ) {
/* references matched in nfs4_layoutcommit_release */
get_lseg ( wdata - > lseg ) ;
wdata - > lseg - > pls_lc_cred =
get_rpccred ( wdata - > args . context - > state - > owner - > so_cred ) ;
2011-04-13 18:53:51 +04:00
mark_as_dirty = true ;
2011-03-23 16:27:54 +03:00
dprintk ( " %s: Set layoutcommit for inode %lu " ,
__func__ , wdata - > inode - > i_ino ) ;
}
if ( end_pos > wdata - > lseg - > pls_end_pos )
wdata - > lseg - > pls_end_pos = end_pos ;
spin_unlock ( & nfsi - > vfs_inode . i_lock ) ;
2011-04-13 18:53:51 +04:00
/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
* will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
if ( mark_as_dirty )
mark_inode_dirty_sync ( wdata - > inode ) ;
2011-03-23 16:27:54 +03:00
}
EXPORT_SYMBOL_GPL ( pnfs_set_layoutcommit ) ;
2011-03-12 10:58:09 +03:00
/*
* For the LAYOUT4_NFSV4_1_FILES layout type , NFS_DATA_SYNC WRITEs and
* NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
* data to disk to allow the server to recover the data if it crashes .
* LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
* is off , and a COMMIT is sent to a data server , or
* if WRITEs to a data server return NFS_DATA_SYNC .
*/
2011-03-23 16:27:54 +03:00
int
2011-03-12 10:58:10 +03:00
pnfs_layoutcommit_inode ( struct inode * inode , bool sync )
2011-03-23 16:27:54 +03:00
{
struct nfs4_layoutcommit_data * data ;
struct nfs_inode * nfsi = NFS_I ( inode ) ;
struct pnfs_layout_segment * lseg ;
struct rpc_cred * cred ;
loff_t end_pos ;
int status = 0 ;
dprintk ( " --> %s inode %lu \n " , __func__ , inode - > i_ino ) ;
2011-03-12 10:58:09 +03:00
if ( ! test_bit ( NFS_INO_LAYOUTCOMMIT , & nfsi - > flags ) )
return 0 ;
2011-03-23 16:27:54 +03:00
/* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
data = kzalloc ( sizeof ( * data ) , GFP_NOFS ) ;
2011-03-12 10:58:09 +03:00
if ( ! data ) {
mark_inode_dirty_sync ( inode ) ;
status = - ENOMEM ;
goto out ;
}
2011-03-23 16:27:54 +03:00
2011-03-12 10:58:09 +03:00
spin_lock ( & inode - > i_lock ) ;
2011-03-23 16:27:54 +03:00
if ( ! test_and_clear_bit ( NFS_INO_LAYOUTCOMMIT , & nfsi - > flags ) ) {
spin_unlock ( & inode - > i_lock ) ;
kfree ( data ) ;
goto out ;
}
/*
* Currently only one ( whole file ) write lseg which is referenced
* in pnfs_set_layoutcommit and will be found .
*/
lseg = pnfs_list_write_lseg ( inode ) ;
end_pos = lseg - > pls_end_pos ;
cred = lseg - > pls_lc_cred ;
lseg - > pls_end_pos = 0 ;
lseg - > pls_lc_cred = NULL ;
2011-03-12 10:58:09 +03:00
memcpy ( & data - > args . stateid . data , nfsi - > layout - > plh_stateid . data ,
sizeof ( nfsi - > layout - > plh_stateid . data ) ) ;
2011-03-23 16:27:54 +03:00
spin_unlock ( & inode - > i_lock ) ;
data - > args . inode = inode ;
data - > lseg = lseg ;
data - > cred = cred ;
nfs_fattr_init ( & data - > fattr ) ;
data - > args . bitmask = NFS_SERVER ( inode ) - > cache_consistency_bitmask ;
data - > res . fattr = & data - > fattr ;
data - > args . lastbytewritten = end_pos - 1 ;
data - > res . server = NFS_SERVER ( inode ) ;
status = nfs4_proc_layoutcommit ( data , sync ) ;
out :
dprintk ( " <-- %s status %d \n " , __func__ , status ) ;
return status ;
}