2010-10-20 08:18:00 +04:00
/*
* Module for the pnfs nfs4 file layout driver .
* Defines all I / O and Policy interface operations , plus code
* to register itself with the pNFS client .
*
* Copyright ( c ) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand < dhildebz @ umich . edu >
*
* Permission is granted to use , copy , create derivative works , and
* redistribute this software and such derivative works for any purpose ,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific , written prior authorization . If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software , then the disclaimer below must also be included .
*
* This software is provided as is , without representation or warranty
* of any kind either express or implied , including without limitation
* the implied warranties of merchantability , fitness for a particular
* purpose , or noninfringement . The Regents of the University of
* Michigan shall not be liable for any damages , including special ,
* indirect , incidental , or consequential damages , with respect to any
* claim arising out of or in connection with the use of the software ,
* even if it has been or is hereafter advised of the possibility of
* such damages .
*/
# include <linux/nfs_fs.h>
2011-06-20 02:33:46 +04:00
# include <linux/nfs_page.h>
2011-07-01 22:23:34 +04:00
# include <linux/module.h>
2010-10-20 08:18:04 +04:00
2012-02-17 22:15:24 +04:00
# include <linux/sunrpc/metrics.h>
2010-10-20 08:18:04 +04:00
# include "internal.h"
2012-03-07 19:49:41 +04:00
# include "delegation.h"
2010-10-20 08:18:04 +04:00
# include "nfs4filelayout.h"
2010-10-20 08:18:00 +04:00
# define NFSDBG_FACILITY NFSDBG_PNFS_LD
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Dean Hildebrand <dhildebz@umich.edu> " ) ;
MODULE_DESCRIPTION ( " The NFSv4 file layout driver " ) ;
2011-03-01 04:34:20 +03:00
# define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
2011-03-01 04:34:18 +03:00
static loff_t
filelayout_get_dense_offset ( struct nfs4_filelayout_segment * flseg ,
loff_t offset )
{
u32 stripe_width = flseg - > stripe_unit * flseg - > dsaddr - > stripe_count ;
2011-08-12 00:54:28 +04:00
u64 stripe_no ;
u32 rem ;
2011-03-01 04:34:18 +03:00
offset - = flseg - > pattern_offset ;
2011-08-12 00:54:28 +04:00
stripe_no = div_u64 ( offset , stripe_width ) ;
div_u64_rem ( offset , flseg - > stripe_unit , & rem ) ;
2011-03-01 04:34:18 +03:00
2011-08-12 00:54:28 +04:00
return stripe_no * flseg - > stripe_unit + rem ;
2011-03-01 04:34:18 +03:00
}
/* This function is used by the layout driver to calculate the
* offset of the file on the dserver based on whether the
* layout type is STRIPE_DENSE or STRIPE_SPARSE
*/
static loff_t
filelayout_get_dserver_offset ( struct pnfs_layout_segment * lseg , loff_t offset )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
switch ( flseg - > stripe_type ) {
case STRIPE_SPARSE :
return offset ;
case STRIPE_DENSE :
return filelayout_get_dense_offset ( flseg , offset ) ;
}
BUG ( ) ;
}
2012-04-28 01:53:46 +04:00
static void filelayout_reset_write ( struct nfs_write_data * data )
{
struct nfs_pgio_header * hdr = data - > header ;
struct rpc_task * task = & data - > task ;
if ( ! test_and_set_bit ( NFS_IOHDR_REDO , & hdr - > flags ) ) {
dprintk ( " %s Reset task %5u for i/o through MDS "
" (req %s/%lld, %u bytes @ offset %llu) \n " , __func__ ,
data - > task . tk_pid ,
2012-05-22 18:10:03 +04:00
hdr - > inode - > i_sb - > s_id ,
( long long ) NFS_FILEID ( hdr - > inode ) ,
2012-04-28 01:53:46 +04:00
data - > args . count ,
( unsigned long long ) data - > args . offset ) ;
task - > tk_status = pnfs_write_done_resend_to_mds ( hdr - > inode ,
& hdr - > pages ,
hdr - > completion_ops ) ;
}
}
static void filelayout_reset_read ( struct nfs_read_data * data )
{
struct nfs_pgio_header * hdr = data - > header ;
struct rpc_task * task = & data - > task ;
if ( ! test_and_set_bit ( NFS_IOHDR_REDO , & hdr - > flags ) ) {
dprintk ( " %s Reset task %5u for i/o through MDS "
" (req %s/%lld, %u bytes @ offset %llu) \n " , __func__ ,
data - > task . tk_pid ,
2012-05-22 18:10:03 +04:00
hdr - > inode - > i_sb - > s_id ,
( long long ) NFS_FILEID ( hdr - > inode ) ,
2012-04-28 01:53:46 +04:00
data - > args . count ,
( unsigned long long ) data - > args . offset ) ;
task - > tk_status = pnfs_read_done_resend_to_mds ( hdr - > inode ,
& hdr - > pages ,
hdr - > completion_ops ) ;
}
}
2011-03-01 04:34:20 +03:00
static int filelayout_async_handle_error ( struct rpc_task * task ,
struct nfs4_state * state ,
struct nfs_client * clp ,
2012-04-28 01:53:46 +04:00
struct pnfs_layout_segment * lseg )
2011-03-01 04:34:20 +03:00
{
2012-04-28 01:53:46 +04:00
struct inode * inode = lseg - > pls_layout - > plh_inode ;
struct nfs_server * mds_server = NFS_SERVER ( inode ) ;
struct nfs4_deviceid_node * devid = FILELAYOUT_DEVID_NODE ( lseg ) ;
2012-03-07 19:49:41 +04:00
struct nfs_client * mds_client = mds_server - > nfs_client ;
2012-04-28 01:53:49 +04:00
struct nfs4_slot_table * tbl = & clp - > cl_session - > fc_slot_table ;
2012-03-07 19:49:41 +04:00
2011-03-01 04:34:20 +03:00
if ( task - > tk_status > = 0 )
return 0 ;
switch ( task - > tk_status ) {
2012-03-07 19:49:41 +04:00
/* MDS state errors */
case - NFS4ERR_DELEG_REVOKED :
case - NFS4ERR_ADMIN_REVOKED :
case - NFS4ERR_BAD_STATEID :
2012-04-28 01:53:46 +04:00
if ( state = = NULL )
break ;
2012-03-08 20:03:53 +04:00
nfs_remove_bad_delegation ( state - > inode ) ;
2012-03-07 19:49:41 +04:00
case - NFS4ERR_OPENMODE :
2012-04-28 01:53:46 +04:00
if ( state = = NULL )
break ;
2012-03-07 19:49:41 +04:00
nfs4_schedule_stateid_recovery ( mds_server , state ) ;
goto wait_on_recovery ;
case - NFS4ERR_EXPIRED :
2012-04-28 01:53:46 +04:00
if ( state ! = NULL )
nfs4_schedule_stateid_recovery ( mds_server , state ) ;
2012-03-07 19:49:41 +04:00
nfs4_schedule_lease_recovery ( mds_client ) ;
goto wait_on_recovery ;
/* DS session errors */
2011-03-01 04:34:20 +03:00
case - NFS4ERR_BADSESSION :
case - NFS4ERR_BADSLOT :
case - NFS4ERR_BAD_HIGH_SLOT :
case - NFS4ERR_DEADSESSION :
case - NFS4ERR_CONN_NOT_BOUND_TO_SESSION :
case - NFS4ERR_SEQ_FALSE_RETRY :
case - NFS4ERR_SEQ_MISORDERED :
dprintk ( " %s ERROR %d, Reset session. Exchangeid "
" flags 0x%x \n " , __func__ , task - > tk_status ,
clp - > cl_exchange_flags ) ;
2012-05-27 21:02:53 +04:00
nfs4_schedule_session_recovery ( clp - > cl_session , task - > tk_status ) ;
2011-03-01 04:34:20 +03:00
break ;
case - NFS4ERR_DELAY :
case - NFS4ERR_GRACE :
case - EKEYEXPIRED :
rpc_delay ( task , FILELAYOUT_POLL_RETRY_MAX ) ;
break ;
2011-05-03 21:43:03 +04:00
case - NFS4ERR_RETRY_UNCACHED_REP :
break ;
2012-04-28 01:53:53 +04:00
/* Invalidate Layout errors */
case - NFS4ERR_PNFS_NO_LAYOUT :
case - ESTALE : /* mapped NFS4ERR_STALE */
case - EBADHANDLE : /* mapped NFS4ERR_BADHANDLE */
case - EISDIR : /* mapped NFS4ERR_ISDIR */
case - NFS4ERR_FHEXPIRED :
case - NFS4ERR_WRONG_TYPE :
dprintk ( " %s Invalid layout error %d \n " , __func__ ,
task - > tk_status ) ;
/*
* Destroy layout so new i / o will get a new layout .
* Layout will not be destroyed until all current lseg
* references are put . Mark layout as invalid to resend failed
* i / o and all i / o waiting on the slot table to the MDS until
* layout is destroyed and a new valid layout is obtained .
*/
set_bit ( NFS_LAYOUT_INVALID ,
2012-05-22 16:09:28 +04:00
& NFS_I ( inode ) - > layout - > plh_flags ) ;
pnfs_destroy_layout ( NFS_I ( inode ) ) ;
2012-04-28 01:53:53 +04:00
rpc_wake_up ( & tbl - > slot_tbl_waitq ) ;
goto reset ;
2012-04-28 01:53:46 +04:00
/* RPC connection errors */
case - ECONNREFUSED :
case - EHOSTDOWN :
case - EHOSTUNREACH :
case - ENETUNREACH :
case - EIO :
case - ETIMEDOUT :
case - EPIPE :
dprintk ( " %s DS connection error %d \n " , __func__ ,
task - > tk_status ) ;
filelayout_mark_devid_invalid ( devid ) ;
2012-06-20 23:03:32 +04:00
clear_bit ( NFS_INO_LAYOUTCOMMIT , & NFS_I ( inode ) - > flags ) ;
2012-06-20 23:03:31 +04:00
_pnfs_return_layout ( inode ) ;
2012-04-28 01:53:49 +04:00
rpc_wake_up ( & tbl - > slot_tbl_waitq ) ;
2012-04-28 01:53:52 +04:00
nfs4_ds_disconnect ( clp ) ;
2012-04-28 01:53:46 +04:00
/* fall through */
2011-03-01 04:34:20 +03:00
default :
2012-04-28 01:53:53 +04:00
reset :
2012-04-28 01:53:46 +04:00
dprintk ( " %s Retry through MDS. Error %d \n " , __func__ ,
2011-03-01 04:34:20 +03:00
task - > tk_status ) ;
2012-04-28 01:53:46 +04:00
return - NFS4ERR_RESET_TO_MDS ;
2011-03-01 04:34:20 +03:00
}
2012-03-07 19:49:41 +04:00
out :
2011-03-01 04:34:20 +03:00
task - > tk_status = 0 ;
return - EAGAIN ;
2012-03-07 19:49:41 +04:00
wait_on_recovery :
rpc_sleep_on ( & mds_client - > cl_rpcwaitq , task , NULL ) ;
if ( test_bit ( NFS4CLNT_MANAGER_RUNNING , & mds_client - > cl_state ) = = 0 )
rpc_wake_up_queued_task ( & mds_client - > cl_rpcwaitq , task ) ;
goto out ;
2011-03-01 04:34:20 +03:00
}
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb ( struct rpc_task * task ,
struct nfs_read_data * data )
{
2012-04-28 01:53:46 +04:00
struct nfs_pgio_header * hdr = data - > header ;
int err ;
2011-03-01 04:34:20 +03:00
2012-04-28 01:53:46 +04:00
err = filelayout_async_handle_error ( task , data - > args . context - > state ,
data - > ds_clp , hdr - > lseg ) ;
2011-03-01 04:34:20 +03:00
2012-04-28 01:53:46 +04:00
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
filelayout_reset_read ( data ) ;
return task - > tk_status ;
case - EAGAIN :
2011-10-19 23:17:29 +04:00
rpc_restart_call_prepare ( task ) ;
2011-03-01 04:34:20 +03:00
return - EAGAIN ;
}
return 0 ;
}
2011-03-23 16:27:54 +03:00
/*
* We reference the rpc_cred of the first WRITE that triggers the need for
* a LAYOUTCOMMIT , and use it to send the layoutcommit compound .
* rfc5661 is not clear about which credential should be used .
*/
static void
filelayout_set_layoutcommit ( struct nfs_write_data * wdata )
{
2012-04-20 22:47:44 +04:00
struct nfs_pgio_header * hdr = wdata - > header ;
if ( FILELAYOUT_LSEG ( hdr - > lseg ) - > commit_through_mds | |
2011-03-23 16:27:54 +03:00
wdata - > res . verf - > committed = = NFS_FILE_SYNC )
return ;
pnfs_set_layoutcommit ( wdata ) ;
2012-04-20 22:47:44 +04:00
dprintk ( " %s ionde %lu pls_end_pos %lu \n " , __func__ , hdr - > inode - > i_ino ,
( unsigned long ) NFS_I ( hdr - > inode ) - > layout - > plh_lwb ) ;
2011-03-23 16:27:54 +03:00
}
2011-03-01 04:34:19 +03:00
/*
* Call ops for the async read / write cases
* In the case of dense layouts , the offset needs to be reset to its
* original value .
*/
static void filelayout_read_prepare ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_read_data * rdata = data ;
2011-03-01 04:34:19 +03:00
2012-04-28 01:53:53 +04:00
if ( filelayout_reset_to_mds ( rdata - > header - > lseg ) ) {
2012-04-28 01:53:48 +04:00
dprintk ( " %s task %u reset io to MDS \n " , __func__ , task - > tk_pid ) ;
filelayout_reset_read ( rdata ) ;
rpc_exit ( task , 0 ) ;
return ;
}
2011-03-01 04:34:20 +03:00
rdata - > read_done_cb = filelayout_read_done_cb ;
2011-03-01 04:34:19 +03:00
if ( nfs41_setup_sequence ( rdata - > ds_clp - > cl_session ,
& rdata - > args . seq_args , & rdata - > res . seq_res ,
2012-01-18 07:04:25 +04:00
task ) )
2011-03-01 04:34:19 +03:00
return ;
rpc_call_start ( task ) ;
}
static void filelayout_read_call_done ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_read_data * rdata = data ;
2011-03-01 04:34:19 +03:00
dprintk ( " --> %s task->tk_status %d \n " , __func__ , task - > tk_status ) ;
2012-05-22 16:09:27 +04:00
if ( test_bit ( NFS_IOHDR_REDO , & rdata - > header - > flags ) & &
task - > tk_status = = 0 )
2012-04-28 01:53:48 +04:00
return ;
2011-03-01 04:34:19 +03:00
/* Note this may cause RPC to be resent */
2012-04-20 22:47:44 +04:00
rdata - > header - > mds_ops - > rpc_call_done ( task , data ) ;
2011-03-01 04:34:19 +03:00
}
2012-02-17 22:15:24 +04:00
static void filelayout_read_count_stats ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_read_data * rdata = data ;
2012-02-17 22:15:24 +04:00
2012-04-20 22:47:44 +04:00
rpc_count_iostats ( task , NFS_SERVER ( rdata - > header - > inode ) - > client - > cl_metrics ) ;
2012-02-17 22:15:24 +04:00
}
2011-03-01 04:34:19 +03:00
static void filelayout_read_release ( void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_read_data * rdata = data ;
2011-03-01 04:34:19 +03:00
2012-05-22 16:09:26 +04:00
nfs_put_client ( rdata - > ds_clp ) ;
2012-04-20 22:47:44 +04:00
rdata - > header - > mds_ops - > rpc_release ( data ) ;
2011-03-01 04:34:19 +03:00
}
2011-03-03 18:13:47 +03:00
static int filelayout_write_done_cb ( struct rpc_task * task ,
struct nfs_write_data * data )
{
2012-04-28 01:53:46 +04:00
struct nfs_pgio_header * hdr = data - > header ;
int err ;
err = filelayout_async_handle_error ( task , data - > args . context - > state ,
data - > ds_clp , hdr - > lseg ) ;
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
filelayout_reset_write ( data ) ;
return task - > tk_status ;
case - EAGAIN :
2011-10-19 23:17:29 +04:00
rpc_restart_call_prepare ( task ) ;
2011-03-03 18:13:47 +03:00
return - EAGAIN ;
}
2011-03-23 16:27:54 +03:00
filelayout_set_layoutcommit ( data ) ;
2011-03-03 18:13:47 +03:00
return 0 ;
}
2011-03-23 16:27:53 +03:00
/* Fake up some data that will cause nfs_commit_release to retry the writes. */
2012-04-20 22:47:39 +04:00
static void prepare_to_resend_writes ( struct nfs_commit_data * data )
2011-03-23 16:27:53 +03:00
{
struct nfs_page * first = nfs_list_entry ( data - > pages . next ) ;
data - > task . tk_status = 0 ;
2012-06-08 19:56:09 +04:00
memcpy ( & data - > verf . verifier , & first - > wb_verf ,
sizeof ( data - > verf . verifier ) ) ;
data - > verf . verifier . data [ 0 ] + + ; /* ensure verifier mismatch */
2011-03-23 16:27:53 +03:00
}
static int filelayout_commit_done_cb ( struct rpc_task * task ,
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data )
2011-03-23 16:27:53 +03:00
{
2012-04-28 01:53:46 +04:00
int err ;
err = filelayout_async_handle_error ( task , NULL , data - > ds_clp ,
data - > lseg ) ;
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
prepare_to_resend_writes ( data ) ;
return - EAGAIN ;
case - EAGAIN :
rpc_restart_call_prepare ( task ) ;
2011-03-23 16:27:53 +03:00
return - EAGAIN ;
}
return 0 ;
}
2011-03-03 18:13:47 +03:00
static void filelayout_write_prepare ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_write_data * wdata = data ;
2011-03-03 18:13:47 +03:00
2012-04-28 01:53:53 +04:00
if ( filelayout_reset_to_mds ( wdata - > header - > lseg ) ) {
2012-04-28 01:53:48 +04:00
dprintk ( " %s task %u reset io to MDS \n " , __func__ , task - > tk_pid ) ;
filelayout_reset_write ( wdata ) ;
rpc_exit ( task , 0 ) ;
return ;
}
2011-03-03 18:13:47 +03:00
if ( nfs41_setup_sequence ( wdata - > ds_clp - > cl_session ,
& wdata - > args . seq_args , & wdata - > res . seq_res ,
2012-01-18 07:04:25 +04:00
task ) )
2011-03-03 18:13:47 +03:00
return ;
rpc_call_start ( task ) ;
}
static void filelayout_write_call_done ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_write_data * wdata = data ;
2011-03-03 18:13:47 +03:00
2012-05-22 16:09:27 +04:00
if ( test_bit ( NFS_IOHDR_REDO , & wdata - > header - > flags ) & &
task - > tk_status = = 0 )
2012-04-28 01:53:48 +04:00
return ;
2011-03-03 18:13:47 +03:00
/* Note this may cause RPC to be resent */
2012-04-20 22:47:44 +04:00
wdata - > header - > mds_ops - > rpc_call_done ( task , data ) ;
2011-03-03 18:13:47 +03:00
}
2012-02-17 22:15:24 +04:00
static void filelayout_write_count_stats ( struct rpc_task * task , void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_write_data * wdata = data ;
2012-02-17 22:15:24 +04:00
2012-04-20 22:47:44 +04:00
rpc_count_iostats ( task , NFS_SERVER ( wdata - > header - > inode ) - > client - > cl_metrics ) ;
2012-02-17 22:15:24 +04:00
}
2011-03-03 18:13:47 +03:00
static void filelayout_write_release ( void * data )
{
2012-04-20 22:47:42 +04:00
struct nfs_write_data * wdata = data ;
2011-03-03 18:13:47 +03:00
2012-05-22 16:09:26 +04:00
nfs_put_client ( wdata - > ds_clp ) ;
2012-04-20 22:47:44 +04:00
wdata - > header - > mds_ops - > rpc_release ( data ) ;
2011-03-03 18:13:47 +03:00
}
2012-04-20 22:47:39 +04:00
static void filelayout_commit_prepare ( struct rpc_task * task , void * data )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * wdata = data ;
2011-03-23 16:27:53 +03:00
2012-04-20 22:47:39 +04:00
if ( nfs41_setup_sequence ( wdata - > ds_clp - > cl_session ,
& wdata - > args . seq_args , & wdata - > res . seq_res ,
task ) )
return ;
rpc_call_start ( task ) ;
}
static void filelayout_write_commit_done ( struct rpc_task * task , void * data )
{
struct nfs_commit_data * wdata = data ;
/* Note this may cause RPC to be resent */
wdata - > mds_ops - > rpc_call_done ( task , data ) ;
}
static void filelayout_commit_count_stats ( struct rpc_task * task , void * data )
{
struct nfs_commit_data * cdata = data ;
rpc_count_iostats ( task , NFS_SERVER ( cdata - > inode ) - > client - > cl_metrics ) ;
}
static void filelayout_commit_release ( void * calldata )
{
struct nfs_commit_data * data = calldata ;
2012-04-20 22:47:54 +04:00
data - > completion_ops - > completion ( data ) ;
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( data - > lseg ) ;
2012-04-28 01:53:51 +04:00
nfs_put_client ( data - > ds_clp ) ;
2012-04-20 22:47:39 +04:00
nfs_commitdata_release ( data ) ;
2011-03-23 16:27:53 +03:00
}
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_read_call_ops = {
2011-03-01 04:34:19 +03:00
. rpc_call_prepare = filelayout_read_prepare ,
. rpc_call_done = filelayout_read_call_done ,
2012-02-17 22:15:24 +04:00
. rpc_count_stats = filelayout_read_count_stats ,
2011-03-01 04:34:19 +03:00
. rpc_release = filelayout_read_release ,
} ;
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_write_call_ops = {
2011-03-03 18:13:47 +03:00
. rpc_call_prepare = filelayout_write_prepare ,
. rpc_call_done = filelayout_write_call_done ,
2012-02-17 22:15:24 +04:00
. rpc_count_stats = filelayout_write_count_stats ,
2011-03-03 18:13:47 +03:00
. rpc_release = filelayout_write_release ,
} ;
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_commit_call_ops = {
2012-04-20 22:47:39 +04:00
. rpc_call_prepare = filelayout_commit_prepare ,
. rpc_call_done = filelayout_write_commit_done ,
. rpc_count_stats = filelayout_commit_count_stats ,
2011-03-23 16:27:53 +03:00
. rpc_release = filelayout_commit_release ,
} ;
2011-03-01 04:34:19 +03:00
static enum pnfs_try_status
filelayout_read_pagelist ( struct nfs_read_data * data )
{
2012-04-20 22:47:44 +04:00
struct nfs_pgio_header * hdr = data - > header ;
struct pnfs_layout_segment * lseg = hdr - > lseg ;
2011-03-01 04:34:19 +03:00
struct nfs4_pnfs_ds * ds ;
loff_t offset = data - > args . offset ;
u32 j , idx ;
struct nfs_fh * fh ;
int status ;
dprintk ( " --> %s ino %lu pgbase %u req %Zu@%llu \n " ,
2012-04-20 22:47:44 +04:00
__func__ , hdr - > inode - > i_ino ,
2011-03-01 04:34:19 +03:00
data - > args . pgbase , ( size_t ) data - > args . count , offset ) ;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index ( lseg , offset ) ;
idx = nfs4_fl_calc_ds_index ( lseg , j ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
2012-04-28 01:53:43 +04:00
if ( ! ds )
2011-03-01 04:34:19 +03:00
return PNFS_NOT_ATTEMPTED ;
2012-04-28 01:53:51 +04:00
dprintk ( " %s USE DS: %s cl_count %d \n " , __func__ ,
ds - > ds_remotestr , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2011-03-01 04:34:19 +03:00
/* No multipath support. Use first DS */
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2011-03-01 04:34:19 +03:00
data - > ds_clp = ds - > ds_clp ;
fh = nfs4_fl_select_ds_fh ( lseg , j ) ;
if ( fh )
data - > args . fh = fh ;
data - > args . offset = filelayout_get_dserver_offset ( lseg , offset ) ;
data - > mds_offset = offset ;
/* Perform an asynchronous read to ds */
2012-04-20 22:47:41 +04:00
status = nfs_initiate_read ( ds - > ds_clp - > cl_rpcclient , data ,
2012-04-28 01:53:44 +04:00
& filelayout_read_call_ops , RPC_TASK_SOFTCONN ) ;
2011-03-01 04:34:19 +03:00
BUG_ON ( status ! = 0 ) ;
return PNFS_ATTEMPTED ;
}
2011-03-03 18:13:47 +03:00
/* Perform async writes. */
2011-03-03 18:13:45 +03:00
static enum pnfs_try_status
filelayout_write_pagelist ( struct nfs_write_data * data , int sync )
{
2012-04-20 22:47:44 +04:00
struct nfs_pgio_header * hdr = data - > header ;
struct pnfs_layout_segment * lseg = hdr - > lseg ;
2011-03-03 18:13:47 +03:00
struct nfs4_pnfs_ds * ds ;
loff_t offset = data - > args . offset ;
u32 j , idx ;
struct nfs_fh * fh ;
int status ;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index ( lseg , offset ) ;
idx = nfs4_fl_calc_ds_index ( lseg , j ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
2012-04-28 01:53:43 +04:00
if ( ! ds )
2011-03-03 18:13:47 +03:00
return PNFS_NOT_ATTEMPTED ;
2012-04-28 01:53:51 +04:00
dprintk ( " %s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d \n " ,
__func__ , hdr - > inode - > i_ino , sync , ( size_t ) data - > args . count ,
offset , ds - > ds_remotestr , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2011-03-03 18:13:47 +03:00
data - > write_done_cb = filelayout_write_done_cb ;
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2011-03-03 18:13:47 +03:00
data - > ds_clp = ds - > ds_clp ;
fh = nfs4_fl_select_ds_fh ( lseg , j ) ;
if ( fh )
data - > args . fh = fh ;
/*
* Get the file offset on the dserver . Set the write offset to
* this offset and save the original offset .
*/
data - > args . offset = filelayout_get_dserver_offset ( lseg , offset ) ;
/* Perform an asynchronous write */
2012-04-20 22:47:41 +04:00
status = nfs_initiate_write ( ds - > ds_clp - > cl_rpcclient , data ,
2012-04-28 01:53:44 +04:00
& filelayout_write_call_ops , sync ,
RPC_TASK_SOFTCONN ) ;
2011-03-03 18:13:47 +03:00
BUG_ON ( status ! = 0 ) ;
return PNFS_ATTEMPTED ;
2011-03-03 18:13:45 +03:00
}
2010-10-20 08:18:04 +04:00
/*
* filelayout_check_layout ( )
*
* Make sure layout segment parameters are sane WRT the device .
* At this point no generic layer initialization of the lseg has occurred ,
* and nothing has been added to the layout_hdr cache .
*
*/
static int
filelayout_check_layout ( struct pnfs_layout_hdr * lo ,
struct nfs4_filelayout_segment * fl ,
struct nfs4_layoutget_res * lgr ,
2011-05-12 02:00:51 +04:00
struct nfs4_deviceid * id ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-05-20 06:14:47 +04:00
struct nfs4_deviceid_node * d ;
2010-10-20 08:18:04 +04:00
struct nfs4_file_layout_dsaddr * dsaddr ;
int status = - EINVAL ;
2011-01-06 14:36:21 +03:00
struct nfs_server * nfss = NFS_SERVER ( lo - > plh_inode ) ;
2010-10-20 08:18:04 +04:00
dprintk ( " --> %s \n " , __func__ ) ;
2011-06-14 02:22:38 +04:00
/* FIXME: remove this check when layout segment support is added */
if ( lgr - > range . offset ! = 0 | |
lgr - > range . length ! = NFS4_MAX_UINT64 ) {
dprintk ( " %s Only whole file layouts supported. Use MDS i/o \n " ,
__func__ ) ;
goto out ;
}
2010-10-20 08:18:04 +04:00
if ( fl - > pattern_offset > lgr - > range . offset ) {
2011-02-23 03:31:57 +03:00
dprintk ( " %s pattern_offset %lld too large \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > pattern_offset ) ;
goto out ;
}
2011-02-23 02:56:01 +03:00
if ( ! fl - > stripe_unit | | fl - > stripe_unit % PAGE_SIZE ) {
dprintk ( " %s Invalid stripe unit (%u) \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > stripe_unit ) ;
goto out ;
}
/* find and reference the deviceid */
2011-05-24 19:04:02 +04:00
d = nfs4_find_get_deviceid ( NFS_SERVER ( lo - > plh_inode ) - > pnfs_curr_ld ,
NFS_SERVER ( lo - > plh_inode ) - > nfs_client , id ) ;
2011-05-20 06:14:47 +04:00
if ( d = = NULL ) {
2011-05-12 02:00:51 +04:00
dsaddr = get_device_info ( lo - > plh_inode , id , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( dsaddr = = NULL )
goto out ;
2011-05-20 06:14:47 +04:00
} else
dsaddr = container_of ( d , struct nfs4_file_layout_dsaddr , id_node ) ;
2011-06-16 01:52:40 +04:00
/* Found deviceid is being reaped */
if ( test_bit ( NFS_DEVICEID_INVALID , & dsaddr - > id_node . flags ) )
goto out_put ;
2010-10-20 08:18:04 +04:00
fl - > dsaddr = dsaddr ;
2011-10-25 20:18:03 +04:00
if ( fl - > first_stripe_index > = dsaddr - > stripe_count ) {
dprintk ( " %s Bad first_stripe_index %u \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > first_stripe_index ) ;
goto out_put ;
}
if ( ( fl - > stripe_type = = STRIPE_SPARSE & &
fl - > num_fh > 1 & & fl - > num_fh ! = dsaddr - > ds_num ) | |
( fl - > stripe_type = = STRIPE_DENSE & &
fl - > num_fh ! = dsaddr - > stripe_count ) ) {
dprintk ( " %s num_fh %u not valid for given packing \n " ,
__func__ , fl - > num_fh ) ;
goto out_put ;
}
if ( fl - > stripe_unit % nfss - > rsize | | fl - > stripe_unit % nfss - > wsize ) {
dprintk ( " %s Stripe unit (%u) not aligned with rsize %u "
" wsize %u \n " , __func__ , fl - > stripe_unit , nfss - > rsize ,
nfss - > wsize ) ;
}
status = 0 ;
out :
dprintk ( " --> %s returns %d \n " , __func__ , status ) ;
return status ;
out_put :
2011-03-01 04:34:21 +03:00
nfs4_fl_put_deviceid ( dsaddr ) ;
2010-10-20 08:18:04 +04:00
goto out ;
}
static void filelayout_free_fh_array ( struct nfs4_filelayout_segment * fl )
{
int i ;
for ( i = 0 ; i < fl - > num_fh ; i + + ) {
if ( ! fl - > fh_array [ i ] )
break ;
kfree ( fl - > fh_array [ i ] ) ;
}
kfree ( fl - > fh_array ) ;
fl - > fh_array = NULL ;
}
static void
_filelayout_free_lseg ( struct nfs4_filelayout_segment * fl )
{
filelayout_free_fh_array ( fl ) ;
kfree ( fl ) ;
}
static int
filelayout_decode_layout ( struct pnfs_layout_hdr * flo ,
struct nfs4_filelayout_segment * fl ,
struct nfs4_layoutget_res * lgr ,
2011-05-12 02:00:51 +04:00
struct nfs4_deviceid * id ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-03-24 23:48:21 +03:00
struct xdr_stream stream ;
2011-05-19 22:16:47 +04:00
struct xdr_buf buf ;
2011-03-24 23:48:21 +03:00
struct page * scratch ;
__be32 * p ;
2010-10-20 08:18:04 +04:00
uint32_t nfl_util ;
int i ;
dprintk ( " %s: set_layout_map Begin \n " , __func__ ) ;
2011-05-12 02:00:51 +04:00
scratch = alloc_page ( gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! scratch )
return - ENOMEM ;
2011-05-19 22:16:47 +04:00
xdr_init_decode_pages ( & stream , & buf , lgr - > layoutp - > pages , lgr - > layoutp - > len ) ;
2011-03-24 23:48:21 +03:00
xdr_set_scratch_buffer ( & stream , page_address ( scratch ) , PAGE_SIZE ) ;
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
* num_fh ( 4 ) */
p = xdr_inline_decode ( & stream , NFS4_DEVICEID4_SIZE + 20 ) ;
if ( unlikely ( ! p ) )
goto out_err ;
2010-10-20 08:18:04 +04:00
memcpy ( id , p , sizeof ( * id ) ) ;
p + = XDR_QUADLEN ( NFS4_DEVICEID4_SIZE ) ;
2011-05-20 06:14:47 +04:00
nfs4_print_deviceid ( id ) ;
2010-10-20 08:18:04 +04:00
nfl_util = be32_to_cpup ( p + + ) ;
if ( nfl_util & NFL4_UFLG_COMMIT_THRU_MDS )
fl - > commit_through_mds = 1 ;
if ( nfl_util & NFL4_UFLG_DENSE )
fl - > stripe_type = STRIPE_DENSE ;
else
fl - > stripe_type = STRIPE_SPARSE ;
fl - > stripe_unit = nfl_util & ~ NFL4_UFLG_MASK ;
fl - > first_stripe_index = be32_to_cpup ( p + + ) ;
p = xdr_decode_hyper ( p , & fl - > pattern_offset ) ;
fl - > num_fh = be32_to_cpup ( p + + ) ;
dprintk ( " %s: nfl_util 0x%X num_fh %u fsi %u po %llu \n " ,
__func__ , nfl_util , fl - > num_fh , fl - > first_stripe_index ,
fl - > pattern_offset ) ;
2011-06-14 02:36:17 +04:00
/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
* Futher checking is done in filelayout_check_layout */
2011-10-25 20:18:03 +04:00
if ( fl - > num_fh >
2011-06-14 02:36:17 +04:00
max ( NFS4_PNFS_MAX_STRIPE_CNT , NFS4_PNFS_MAX_MULTI_CNT ) )
2011-03-24 23:48:21 +03:00
goto out_err ;
2011-06-14 02:36:17 +04:00
if ( fl - > num_fh > 0 ) {
fl - > fh_array = kzalloc ( fl - > num_fh * sizeof ( struct nfs_fh * ) ,
gfp_flags ) ;
if ( ! fl - > fh_array )
goto out_err ;
}
2010-10-20 08:18:04 +04:00
for ( i = 0 ; i < fl - > num_fh ; i + + ) {
/* Do we want to use a mempool here? */
2011-05-12 02:00:51 +04:00
fl - > fh_array [ i ] = kmalloc ( sizeof ( struct nfs_fh ) , gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! fl - > fh_array [ i ] )
goto out_err_free ;
p = xdr_inline_decode ( & stream , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err_free ;
2010-10-20 08:18:04 +04:00
fl - > fh_array [ i ] - > size = be32_to_cpup ( p + + ) ;
if ( sizeof ( struct nfs_fh ) < fl - > fh_array [ i ] - > size ) {
2012-01-26 22:32:22 +04:00
printk ( KERN_ERR " NFS: Too big fh %d received %d \n " ,
2010-10-20 08:18:04 +04:00
i , fl - > fh_array [ i ] - > size ) ;
2011-03-24 23:48:21 +03:00
goto out_err_free ;
2010-10-20 08:18:04 +04:00
}
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( & stream , fl - > fh_array [ i ] - > size ) ;
if ( unlikely ( ! p ) )
goto out_err_free ;
2010-10-20 08:18:04 +04:00
memcpy ( fl - > fh_array [ i ] - > data , p , fl - > fh_array [ i ] - > size ) ;
dprintk ( " DEBUG: %s: fh len %d \n " , __func__ ,
fl - > fh_array [ i ] - > size ) ;
}
2011-03-24 23:48:21 +03:00
__free_page ( scratch ) ;
2010-10-20 08:18:04 +04:00
return 0 ;
2011-03-24 23:48:21 +03:00
out_err_free :
filelayout_free_fh_array ( fl ) ;
out_err :
__free_page ( scratch ) ;
return - EIO ;
2010-10-20 08:18:04 +04:00
}
2011-03-23 16:27:49 +03:00
static void
filelayout_free_lseg ( struct pnfs_layout_segment * lseg )
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
dprintk ( " --> %s \n " , __func__ ) ;
nfs4_fl_put_deviceid ( fl - > dsaddr ) ;
2012-04-20 22:47:38 +04:00
/* This assumes a single RW lseg */
if ( lseg - > pls_range . iomode = = IOMODE_RW ) {
struct nfs4_filelayout * flo ;
flo = FILELAYOUT_FROM_HDR ( lseg - > pls_layout ) ;
flo - > commit_info . nbuckets = 0 ;
kfree ( flo - > commit_info . buckets ) ;
flo - > commit_info . buckets = NULL ;
}
2011-03-23 16:27:49 +03:00
_filelayout_free_lseg ( fl ) ;
}
2012-04-20 22:47:38 +04:00
static int
filelayout_alloc_commit_info ( struct pnfs_layout_segment * lseg ,
2012-04-20 22:47:53 +04:00
struct nfs_commit_info * cinfo ,
2012-04-20 22:47:38 +04:00
gfp_t gfp_flags )
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2012-04-20 22:47:38 +04:00
int size ;
if ( fl - > commit_through_mds )
return 0 ;
2012-04-20 22:47:53 +04:00
if ( cinfo - > ds - > nbuckets ! = 0 ) {
2012-04-20 22:47:38 +04:00
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of < multipath_list4 , fh > keys , each
* associated with a struct list_head , populated by calls
* to filelayout_write_pagelist ( ) .
* */
return 0 ;
}
size = ( fl - > stripe_type = = STRIPE_SPARSE ) ?
fl - > dsaddr - > ds_num : fl - > dsaddr - > stripe_count ;
2012-04-20 22:47:53 +04:00
buckets = kcalloc ( size , sizeof ( struct pnfs_commit_bucket ) ,
2012-04-20 22:47:38 +04:00
gfp_flags ) ;
if ( ! buckets )
return - ENOMEM ;
else {
int i ;
2012-04-20 22:47:53 +04:00
spin_lock ( cinfo - > lock ) ;
if ( cinfo - > ds - > nbuckets ! = 0 )
2012-04-20 22:47:38 +04:00
kfree ( buckets ) ;
else {
2012-04-20 22:47:53 +04:00
cinfo - > ds - > buckets = buckets ;
cinfo - > ds - > nbuckets = size ;
2012-04-20 22:47:38 +04:00
for ( i = 0 ; i < size ; i + + ) {
INIT_LIST_HEAD ( & buckets [ i ] . written ) ;
INIT_LIST_HEAD ( & buckets [ i ] . committing ) ;
}
}
2012-04-20 22:47:53 +04:00
spin_unlock ( cinfo - > lock ) ;
2012-04-20 22:47:38 +04:00
return 0 ;
}
}
2010-10-20 08:18:04 +04:00
static struct pnfs_layout_segment *
filelayout_alloc_lseg ( struct pnfs_layout_hdr * layoutid ,
2011-05-12 02:00:51 +04:00
struct nfs4_layoutget_res * lgr ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
struct nfs4_filelayout_segment * fl ;
int rc ;
struct nfs4_deviceid id ;
dprintk ( " --> %s \n " , __func__ ) ;
2011-05-12 02:00:51 +04:00
fl = kzalloc ( sizeof ( * fl ) , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( ! fl )
return NULL ;
2011-05-12 02:00:51 +04:00
rc = filelayout_decode_layout ( layoutid , fl , lgr , & id , gfp_flags ) ;
if ( rc ! = 0 | | filelayout_check_layout ( layoutid , fl , lgr , & id , gfp_flags ) ) {
2010-10-20 08:18:04 +04:00
_filelayout_free_lseg ( fl ) ;
return NULL ;
}
return & fl - > generic_hdr ;
}
2011-03-01 04:34:14 +03:00
/*
* filelayout_pg_test ( ) . Called by nfs_can_coalesce_requests ( )
*
2011-05-25 22:03:56 +04:00
* return true : coalesce page
* return false : don ' t coalesce page
2011-03-01 04:34:14 +03:00
*/
2011-06-10 21:30:23 +04:00
static bool
2011-03-01 04:34:14 +03:00
filelayout_pg_test ( struct nfs_pageio_descriptor * pgio , struct nfs_page * prev ,
struct nfs_page * req )
{
u64 p_stripe , r_stripe ;
u32 stripe_unit ;
2011-06-20 02:33:46 +04:00
if ( ! pnfs_generic_pg_test ( pgio , prev , req ) | |
! nfs_generic_pg_test ( pgio , prev , req ) )
return false ;
2011-05-25 21:54:40 +04:00
2012-04-20 22:47:43 +04:00
p_stripe = ( u64 ) req_offset ( prev ) ;
r_stripe = ( u64 ) req_offset ( req ) ;
2011-03-01 04:34:14 +03:00
stripe_unit = FILELAYOUT_LSEG ( pgio - > pg_lseg ) - > stripe_unit ;
do_div ( p_stripe , stripe_unit ) ;
do_div ( r_stripe , stripe_unit ) ;
return ( p_stripe = = r_stripe ) ;
}
2012-03-11 21:11:00 +04:00
static void
2011-06-14 02:22:38 +04:00
filelayout_pg_init_read ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * req )
{
BUG_ON ( pgio - > pg_lseg ! = NULL ) ;
2012-04-21 03:55:31 +04:00
if ( req - > wb_offset ! = req - > wb_pgbase ) {
/*
* Handling unaligned pages is difficult , because have to
* somehow split a req in two in certain cases in the
* pg . test code . Avoid this by just not using pnfs
* in this case .
*/
nfs_pageio_reset_read_mds ( pgio ) ;
return ;
}
2011-06-14 02:22:38 +04:00
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
req - > wb_context ,
0 ,
NFS4_MAX_UINT64 ,
IOMODE_READ ,
GFP_KERNEL ) ;
/* If no lseg, fall back to read through mds */
if ( pgio - > pg_lseg = = NULL )
2011-07-13 23:59:57 +04:00
nfs_pageio_reset_read_mds ( pgio ) ;
2011-06-14 02:22:38 +04:00
}
2012-03-11 21:11:00 +04:00
static void
2011-06-14 02:22:38 +04:00
filelayout_pg_init_write ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * req )
{
2012-04-20 22:47:53 +04:00
struct nfs_commit_info cinfo ;
2012-04-20 22:47:38 +04:00
int status ;
2011-06-14 02:22:38 +04:00
BUG_ON ( pgio - > pg_lseg ! = NULL ) ;
2012-04-21 03:55:31 +04:00
if ( req - > wb_offset ! = req - > wb_pgbase )
goto out_mds ;
2011-06-14 02:22:38 +04:00
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
req - > wb_context ,
0 ,
NFS4_MAX_UINT64 ,
IOMODE_RW ,
GFP_NOFS ) ;
/* If no lseg, fall back to write through mds */
if ( pgio - > pg_lseg = = NULL )
2012-04-20 22:47:38 +04:00
goto out_mds ;
2012-04-20 22:47:53 +04:00
nfs_init_cinfo ( & cinfo , pgio - > pg_inode , pgio - > pg_dreq ) ;
status = filelayout_alloc_commit_info ( pgio - > pg_lseg , & cinfo , GFP_NOFS ) ;
2012-04-20 22:47:38 +04:00
if ( status < 0 ) {
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( pgio - > pg_lseg ) ;
2012-04-20 22:47:38 +04:00
pgio - > pg_lseg = NULL ;
goto out_mds ;
}
return ;
out_mds :
nfs_pageio_reset_write_mds ( pgio ) ;
2011-06-14 02:22:38 +04:00
}
2011-06-10 21:30:23 +04:00
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
2011-06-14 02:22:38 +04:00
. pg_init = filelayout_pg_init_read ,
2011-06-10 21:30:23 +04:00
. pg_test = filelayout_pg_test ,
2011-07-13 23:58:28 +04:00
. pg_doio = pnfs_generic_pg_readpages ,
2011-06-10 21:30:23 +04:00
} ;
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
2011-06-14 02:22:38 +04:00
. pg_init = filelayout_pg_init_write ,
2011-06-10 21:30:23 +04:00
. pg_test = filelayout_pg_test ,
2011-07-13 23:59:19 +04:00
. pg_doio = pnfs_generic_pg_writepages ,
2011-06-10 21:30:23 +04:00
} ;
2011-03-23 16:27:53 +03:00
static u32 select_bucket_index ( struct nfs4_filelayout_segment * fl , u32 j )
{
if ( fl - > stripe_type = = STRIPE_SPARSE )
return nfs4_fl_calc_ds_index ( & fl - > generic_hdr , j ) ;
else
return j ;
}
2012-03-09 02:29:35 +04:00
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty , it will need to put the lseg reference .
*/
2012-03-16 01:16:40 +04:00
static void
2012-04-20 22:47:53 +04:00
filelayout_clear_request_commit ( struct nfs_page * req ,
struct nfs_commit_info * cinfo )
2012-03-09 02:29:35 +04:00
{
2012-03-16 01:16:40 +04:00
struct pnfs_layout_segment * freeme = NULL ;
2012-04-20 22:47:53 +04:00
spin_lock ( cinfo - > lock ) ;
2012-03-16 01:16:40 +04:00
if ( ! test_and_clear_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) )
goto out ;
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten - - ;
2012-03-09 02:29:35 +04:00
if ( list_is_singular ( & req - > wb_list ) ) {
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * bucket ;
2012-03-09 02:29:35 +04:00
2012-04-20 22:47:38 +04:00
bucket = list_first_entry ( & req - > wb_list ,
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket ,
2012-04-20 22:47:38 +04:00
written ) ;
freeme = bucket - > wlseg ;
bucket - > wlseg = NULL ;
2012-03-09 02:29:35 +04:00
}
2012-03-16 01:16:40 +04:00
out :
2012-04-20 22:47:53 +04:00
nfs_request_remove_commit_list ( req , cinfo ) ;
spin_unlock ( cinfo - > lock ) ;
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( freeme ) ;
2012-03-09 02:29:35 +04:00
}
static struct list_head *
filelayout_choose_commit_list ( struct nfs_page * req ,
2012-04-20 22:47:53 +04:00
struct pnfs_layout_segment * lseg ,
struct nfs_commit_info * cinfo )
2011-03-23 16:27:53 +03:00
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
u32 i , j ;
struct list_head * list ;
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2011-03-23 16:27:53 +03:00
2012-03-09 02:29:35 +04:00
if ( fl - > commit_through_mds )
2012-04-20 22:47:53 +04:00
return & cinfo - > mds - > list ;
2012-03-09 02:29:35 +04:00
2011-03-23 16:27:53 +03:00
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server . An attractive
* alternative is to add a field to nfs_write_data and nfs_page
* to store the value calculated in filelayout_write_pagelist
* and just use that here .
*/
2012-04-20 22:47:43 +04:00
j = nfs4_fl_calc_j_index ( lseg , req_offset ( req ) ) ;
2011-03-23 16:27:53 +03:00
i = select_bucket_index ( fl , j ) ;
2012-04-20 22:47:53 +04:00
buckets = cinfo - > ds - > buckets ;
2012-04-20 22:47:38 +04:00
list = & buckets [ i ] . written ;
2011-03-23 16:27:53 +03:00
if ( list_empty ( list ) ) {
2012-03-09 02:29:35 +04:00
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there . It could also be released if the last req is pulled
* off due to a rewrite , in which case it will be done in
2012-04-20 22:47:38 +04:00
* filelayout_clear_request_commit
2012-03-09 02:29:35 +04:00
*/
2012-09-19 04:57:08 +04:00
buckets [ i ] . wlseg = pnfs_get_lseg ( lseg ) ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 01:16:40 +04:00
set_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) ;
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten + + ;
2011-03-23 16:27:53 +03:00
return list ;
}
2012-03-16 01:16:40 +04:00
static void
filelayout_mark_request_commit ( struct nfs_page * req ,
2012-04-20 22:47:53 +04:00
struct pnfs_layout_segment * lseg ,
struct nfs_commit_info * cinfo )
2012-03-16 01:16:40 +04:00
{
struct list_head * list ;
2012-04-20 22:47:53 +04:00
list = filelayout_choose_commit_list ( req , lseg , cinfo ) ;
nfs_request_add_commit_list ( req , list , cinfo ) ;
2012-03-16 01:16:40 +04:00
}
2011-03-23 16:27:53 +03:00
static u32 calc_ds_index_from_commit ( struct pnfs_layout_segment * lseg , u32 i )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
if ( flseg - > stripe_type = = STRIPE_SPARSE )
return i ;
else
return nfs4_fl_calc_ds_index ( lseg , i ) ;
}
static struct nfs_fh *
select_ds_fh_from_commit ( struct pnfs_layout_segment * lseg , u32 i )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
if ( flseg - > stripe_type = = STRIPE_SPARSE ) {
if ( flseg - > num_fh = = 1 )
i = 0 ;
else if ( flseg - > num_fh = = 0 )
/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
return NULL ;
}
return flseg - > fh_array [ i ] ;
}
2012-04-20 22:47:39 +04:00
static int filelayout_initiate_commit ( struct nfs_commit_data * data , int how )
2011-03-23 16:27:53 +03:00
{
struct pnfs_layout_segment * lseg = data - > lseg ;
struct nfs4_pnfs_ds * ds ;
u32 idx ;
struct nfs_fh * fh ;
idx = calc_ds_index_from_commit ( lseg , data - > ds_commit_index ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
if ( ! ds ) {
prepare_to_resend_writes ( data ) ;
2012-03-16 01:16:40 +04:00
filelayout_commit_release ( data ) ;
2011-03-23 16:27:53 +03:00
return - EAGAIN ;
}
2012-04-28 01:53:51 +04:00
dprintk ( " %s ino %lu, how %d cl_count %d \n " , __func__ ,
data - > inode - > i_ino , how , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2012-04-20 22:47:39 +04:00
data - > commit_done_cb = filelayout_commit_done_cb ;
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2011-03-23 16:27:53 +03:00
data - > ds_clp = ds - > ds_clp ;
fh = select_ds_fh_from_commit ( lseg , data - > ds_commit_index ) ;
if ( fh )
data - > args . fh = fh ;
2012-04-20 22:47:39 +04:00
return nfs_initiate_commit ( ds - > ds_clp - > cl_rpcclient , data ,
2012-04-28 01:53:44 +04:00
& filelayout_commit_call_ops , how ,
RPC_TASK_SOFTCONN ) ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 01:16:40 +04:00
static int
2012-04-20 22:47:57 +04:00
transfer_commit_list ( struct list_head * src , struct list_head * dst ,
struct nfs_commit_info * cinfo , int max )
2012-03-16 01:16:40 +04:00
{
struct nfs_page * req , * tmp ;
int ret = 0 ;
list_for_each_entry_safe ( req , tmp , src , wb_list ) {
if ( ! nfs_lock_request ( req ) )
continue ;
2012-05-23 00:36:27 +04:00
kref_get ( & req - > wb_kref ) ;
2012-04-20 22:47:53 +04:00
if ( cond_resched_lock ( cinfo - > lock ) )
2012-03-17 19:59:30 +04:00
list_safe_reset_next ( req , tmp , wb_list ) ;
2012-04-20 22:47:53 +04:00
nfs_request_remove_commit_list ( req , cinfo ) ;
2012-03-16 01:16:40 +04:00
clear_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) ;
nfs_list_add_request ( req , dst ) ;
ret + + ;
2012-04-20 22:47:57 +04:00
if ( ( ret = = max ) & & ! cinfo - > dreq )
2012-03-16 01:16:40 +04:00
break ;
}
2012-04-20 22:47:57 +04:00
return ret ;
}
static int
filelayout_scan_ds_commit_list ( struct pnfs_commit_bucket * bucket ,
struct nfs_commit_info * cinfo ,
int max )
{
struct list_head * src = & bucket - > written ;
struct list_head * dst = & bucket - > committing ;
int ret ;
ret = transfer_commit_list ( src , dst , cinfo , max ) ;
2012-04-20 22:47:38 +04:00
if ( ret ) {
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten - = ret ;
cinfo - > ds - > ncommitting + = ret ;
2012-04-20 22:47:38 +04:00
bucket - > clseg = bucket - > wlseg ;
if ( list_empty ( src ) )
bucket - > wlseg = NULL ;
else
2012-09-19 04:57:08 +04:00
pnfs_get_lseg ( bucket - > clseg ) ;
2012-04-20 22:47:38 +04:00
}
2012-03-16 01:16:40 +04:00
return ret ;
}
2012-03-09 02:29:35 +04:00
/* Move reqs from written to committing lists, returning count of number moved.
2012-04-20 22:47:53 +04:00
* Note called with cinfo - > lock held .
2012-03-09 02:29:35 +04:00
*/
2012-04-20 22:47:53 +04:00
static int filelayout_scan_commit_lists ( struct nfs_commit_info * cinfo ,
int max )
2012-03-09 02:29:35 +04:00
{
int i , rv = 0 , cnt ;
2012-04-20 22:47:53 +04:00
for ( i = 0 ; i < cinfo - > ds - > nbuckets & & max ! = 0 ; i + + ) {
cnt = filelayout_scan_ds_commit_list ( & cinfo - > ds - > buckets [ i ] ,
cinfo , max ) ;
2012-03-09 02:29:35 +04:00
max - = cnt ;
rv + = cnt ;
}
return rv ;
}
2012-04-20 22:47:57 +04:00
/* Pull everything off the committing lists and dump into @dst */
static void filelayout_recover_commit_reqs ( struct list_head * dst ,
struct nfs_commit_info * cinfo )
{
struct pnfs_commit_bucket * b ;
int i ;
/* NOTE cinfo->lock is NOT held, relying on fact that this is
* only called on single thread per dreq .
2012-09-19 04:57:08 +04:00
* Can ' t take the lock because need to do pnfs_put_lseg
2012-04-20 22:47:57 +04:00
*/
for ( i = 0 , b = cinfo - > ds - > buckets ; i < cinfo - > ds - > nbuckets ; i + + , b + + ) {
if ( transfer_commit_list ( & b - > written , dst , cinfo , 0 ) ) {
BUG_ON ( ! list_empty ( & b - > written ) ) ;
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( b - > wlseg ) ;
2012-04-20 22:47:57 +04:00
b - > wlseg = NULL ;
}
}
cinfo - > ds - > nwritten = 0 ;
}
2012-03-16 21:52:45 +04:00
static unsigned int
2012-04-20 22:47:53 +04:00
alloc_ds_commits ( struct nfs_commit_info * cinfo , struct list_head * list )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:53 +04:00
struct pnfs_ds_commit_info * fl_cinfo ;
struct pnfs_commit_bucket * bucket ;
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data ;
2011-03-23 16:27:53 +03:00
int i , j ;
2012-03-16 21:52:45 +04:00
unsigned int nreq = 0 ;
2011-03-23 16:27:53 +03:00
2012-04-20 22:47:53 +04:00
fl_cinfo = cinfo - > ds ;
2012-04-20 22:47:38 +04:00
bucket = fl_cinfo - > buckets ;
for ( i = 0 ; i < fl_cinfo - > nbuckets ; i + + , bucket + + ) {
if ( list_empty ( & bucket - > committing ) )
2011-03-23 16:27:53 +03:00
continue ;
data = nfs_commitdata_alloc ( ) ;
if ( ! data )
2012-03-16 21:52:45 +04:00
break ;
2011-03-23 16:27:53 +03:00
data - > ds_commit_index = i ;
2012-04-20 22:47:38 +04:00
data - > lseg = bucket - > clseg ;
bucket - > clseg = NULL ;
2011-03-23 16:27:53 +03:00
list_add ( & data - > pages , list ) ;
2012-03-16 21:52:45 +04:00
nreq + + ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 21:52:45 +04:00
/* Clean up on error */
2012-04-20 22:47:38 +04:00
for ( j = i ; j < fl_cinfo - > nbuckets ; j + + , bucket + + ) {
if ( list_empty ( & bucket - > committing ) )
2011-03-23 16:27:53 +03:00
continue ;
2012-04-20 22:47:53 +04:00
nfs_retry_commit ( & bucket - > committing , bucket - > clseg , cinfo ) ;
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( bucket - > clseg ) ;
2012-04-20 22:47:38 +04:00
bucket - > clseg = NULL ;
2011-03-23 16:27:53 +03:00
}
/* Caller will clean up entries put on list */
2012-03-16 21:52:45 +04:00
return nreq ;
2011-03-23 16:27:53 +03:00
}
/* This follows nfs_commit_list pretty closely */
static int
filelayout_commit_pagelist ( struct inode * inode , struct list_head * mds_pages ,
2012-04-20 22:47:53 +04:00
int how , struct nfs_commit_info * cinfo )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data , * tmp ;
2011-03-23 16:27:53 +03:00
LIST_HEAD ( list ) ;
2012-03-16 21:52:45 +04:00
unsigned int nreq = 0 ;
2011-03-23 16:27:53 +03:00
if ( ! list_empty ( mds_pages ) ) {
data = nfs_commitdata_alloc ( ) ;
2012-03-16 21:52:45 +04:00
if ( data ! = NULL ) {
data - > lseg = NULL ;
list_add ( & data - > pages , & list ) ;
nreq + + ;
} else
2012-04-20 22:47:53 +04:00
nfs_retry_commit ( mds_pages , NULL , cinfo ) ;
2011-03-23 16:27:53 +03:00
}
2012-04-20 22:47:53 +04:00
nreq + = alloc_ds_commits ( cinfo , & list ) ;
2012-03-16 21:52:45 +04:00
if ( nreq = = 0 ) {
2012-04-20 22:47:54 +04:00
cinfo - > completion_ops - > error_cleanup ( NFS_I ( inode ) ) ;
2012-03-16 21:52:45 +04:00
goto out ;
}
2012-04-20 22:47:53 +04:00
atomic_add ( nreq , & cinfo - > mds - > rpcs_out ) ;
2011-03-23 16:27:53 +03:00
list_for_each_entry_safe ( data , tmp , & list , pages ) {
list_del_init ( & data - > pages ) ;
if ( ! data - > lseg ) {
2012-04-20 22:47:54 +04:00
nfs_init_commit ( data , mds_pages , NULL , cinfo ) ;
2012-04-20 22:47:39 +04:00
nfs_initiate_commit ( NFS_CLIENT ( inode ) , data ,
2012-04-28 01:53:44 +04:00
data - > mds_ops , how , 0 ) ;
2011-03-23 16:27:53 +03:00
} else {
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2012-04-20 22:47:38 +04:00
2012-04-20 22:47:53 +04:00
buckets = cinfo - > ds - > buckets ;
2012-04-20 22:47:54 +04:00
nfs_init_commit ( data , & buckets [ data - > ds_commit_index ] . committing , data - > lseg , cinfo ) ;
2011-03-23 16:27:53 +03:00
filelayout_initiate_commit ( data , how ) ;
}
}
2012-03-16 21:52:45 +04:00
out :
2012-04-20 22:47:53 +04:00
cinfo - > ds - > ncommitting = 0 ;
2012-03-16 21:52:45 +04:00
return PNFS_ATTEMPTED ;
2011-03-23 16:27:53 +03:00
}
2011-05-20 15:47:33 +04:00
static void
filelayout_free_deveiceid_node ( struct nfs4_deviceid_node * d )
{
nfs4_fl_free_deviceid ( container_of ( d , struct nfs4_file_layout_dsaddr , id_node ) ) ;
}
2012-04-20 22:47:38 +04:00
static struct pnfs_layout_hdr *
filelayout_alloc_layout_hdr ( struct inode * inode , gfp_t gfp_flags )
{
struct nfs4_filelayout * flo ;
flo = kzalloc ( sizeof ( * flo ) , gfp_flags ) ;
return & flo - > generic_hdr ;
}
static void
filelayout_free_layout_hdr ( struct pnfs_layout_hdr * lo )
{
kfree ( FILELAYOUT_FROM_HDR ( lo ) ) ;
}
2012-04-20 22:47:53 +04:00
static struct pnfs_ds_commit_info *
filelayout_get_ds_info ( struct inode * inode )
{
2012-04-24 22:50:34 +04:00
struct pnfs_layout_hdr * layout = NFS_I ( inode ) - > layout ;
if ( layout = = NULL )
return NULL ;
else
return & FILELAYOUT_FROM_HDR ( layout ) - > commit_info ;
2012-04-20 22:47:53 +04:00
}
2010-10-20 08:18:00 +04:00
static struct pnfs_layoutdriver_type filelayout_type = {
2011-03-01 04:34:21 +03:00
. id = LAYOUT_NFSV4_1_FILES ,
. name = " LAYOUT_NFSV4_1_FILES " ,
. owner = THIS_MODULE ,
2012-04-20 22:47:38 +04:00
. alloc_layout_hdr = filelayout_alloc_layout_hdr ,
. free_layout_hdr = filelayout_free_layout_hdr ,
2011-03-01 04:34:21 +03:00
. alloc_lseg = filelayout_alloc_lseg ,
. free_lseg = filelayout_free_lseg ,
2011-06-10 21:30:23 +04:00
. pg_read_ops = & filelayout_pg_read_ops ,
. pg_write_ops = & filelayout_pg_write_ops ,
2012-04-20 22:47:53 +04:00
. get_ds_info = & filelayout_get_ds_info ,
2012-03-16 01:16:40 +04:00
. mark_request_commit = filelayout_mark_request_commit ,
. clear_request_commit = filelayout_clear_request_commit ,
2012-03-09 02:29:35 +04:00
. scan_commit_lists = filelayout_scan_commit_lists ,
2012-04-20 22:47:57 +04:00
. recover_commit_reqs = filelayout_recover_commit_reqs ,
2011-03-23 16:27:53 +03:00
. commit_pagelist = filelayout_commit_pagelist ,
2011-03-01 04:34:19 +03:00
. read_pagelist = filelayout_read_pagelist ,
2011-03-03 18:13:45 +03:00
. write_pagelist = filelayout_write_pagelist ,
2011-05-20 15:47:33 +04:00
. free_deviceid_node = filelayout_free_deveiceid_node ,
2010-10-20 08:18:00 +04:00
} ;
static int __init nfs4filelayout_init ( void )
{
printk ( KERN_INFO " %s: NFSv4 File Layout Driver Registering... \n " ,
__func__ ) ;
return pnfs_register_layoutdriver ( & filelayout_type ) ;
}
static void __exit nfs4filelayout_exit ( void )
{
printk ( KERN_INFO " %s: NFSv4 File Layout Driver Unregistering... \n " ,
__func__ ) ;
pnfs_unregister_layoutdriver ( & filelayout_type ) ;
}
2011-07-16 03:18:42 +04:00
MODULE_ALIAS ( " nfs-layouttype4-1 " ) ;
2010-10-20 08:18:00 +04:00
module_init ( nfs4filelayout_init ) ;
module_exit ( nfs4filelayout_exit ) ;