2010-10-20 08:18:00 +04:00
/*
* Module for the pnfs nfs4 file layout driver .
* Defines all I / O and Policy interface operations , plus code
* to register itself with the pNFS client .
*
* Copyright ( c ) 2002
* The Regents of the University of Michigan
* All Rights Reserved
*
* Dean Hildebrand < dhildebz @ umich . edu >
*
* Permission is granted to use , copy , create derivative works , and
* redistribute this software and such derivative works for any purpose ,
* so long as the name of the University of Michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific , written prior authorization . If
* the above copyright notice or any other identification of the
* University of Michigan is included in any copy of any portion of
* this software , then the disclaimer below must also be included .
*
* This software is provided as is , without representation or warranty
* of any kind either express or implied , including without limitation
* the implied warranties of merchantability , fitness for a particular
* purpose , or noninfringement . The Regents of the University of
* Michigan shall not be liable for any damages , including special ,
* indirect , incidental , or consequential damages , with respect to any
* claim arising out of or in connection with the use of the software ,
* even if it has been or is hereafter advised of the possibility of
* such damages .
*/
# include <linux/nfs_fs.h>
2011-06-20 02:33:46 +04:00
# include <linux/nfs_page.h>
2011-07-01 22:23:34 +04:00
# include <linux/module.h>
2010-10-20 08:18:04 +04:00
2012-02-17 22:15:24 +04:00
# include <linux/sunrpc/metrics.h>
2014-05-13 01:35:52 +04:00
# include "../nfs4session.h"
# include "../internal.h"
# include "../delegation.h"
# include "filelayout.h"
# include "../nfs4trace.h"
2010-10-20 08:18:00 +04:00
# define NFSDBG_FACILITY NFSDBG_PNFS_LD
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Dean Hildebrand <dhildebz@umich.edu> " ) ;
MODULE_DESCRIPTION ( " The NFSv4 file layout driver " ) ;
2011-03-01 04:34:20 +03:00
# define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
2011-03-01 04:34:18 +03:00
static loff_t
filelayout_get_dense_offset ( struct nfs4_filelayout_segment * flseg ,
loff_t offset )
{
u32 stripe_width = flseg - > stripe_unit * flseg - > dsaddr - > stripe_count ;
2011-08-12 00:54:28 +04:00
u64 stripe_no ;
u32 rem ;
2011-03-01 04:34:18 +03:00
offset - = flseg - > pattern_offset ;
2011-08-12 00:54:28 +04:00
stripe_no = div_u64 ( offset , stripe_width ) ;
div_u64_rem ( offset , flseg - > stripe_unit , & rem ) ;
2011-03-01 04:34:18 +03:00
2011-08-12 00:54:28 +04:00
return stripe_no * flseg - > stripe_unit + rem ;
2011-03-01 04:34:18 +03:00
}
/* This function is used by the layout driver to calculate the
* offset of the file on the dserver based on whether the
* layout type is STRIPE_DENSE or STRIPE_SPARSE
*/
static loff_t
filelayout_get_dserver_offset ( struct pnfs_layout_segment * lseg , loff_t offset )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
switch ( flseg - > stripe_type ) {
case STRIPE_SPARSE :
return offset ;
case STRIPE_DENSE :
return filelayout_get_dense_offset ( flseg , offset ) ;
}
BUG ( ) ;
}
2014-06-09 19:48:35 +04:00
static void filelayout_reset_write ( struct nfs_pgio_header * hdr )
2012-04-28 01:53:46 +04:00
{
2014-06-09 19:48:35 +04:00
struct rpc_task * task = & hdr - > task ;
2012-04-28 01:53:46 +04:00
if ( ! test_and_set_bit ( NFS_IOHDR_REDO , & hdr - > flags ) ) {
dprintk ( " %s Reset task %5u for i/o through MDS "
2013-12-17 21:20:16 +04:00
" (req %s/%llu, %u bytes @ offset %llu) \n " , __func__ ,
2014-06-09 19:48:35 +04:00
hdr - > task . tk_pid ,
2012-05-22 18:10:03 +04:00
hdr - > inode - > i_sb - > s_id ,
2013-12-17 21:20:16 +04:00
( unsigned long long ) NFS_FILEID ( hdr - > inode ) ,
2014-06-09 19:48:35 +04:00
hdr - > args . count ,
( unsigned long long ) hdr - > args . offset ) ;
2012-04-28 01:53:46 +04:00
2014-06-09 19:48:38 +04:00
task - > tk_status = pnfs_write_done_resend_to_mds ( hdr ) ;
2012-04-28 01:53:46 +04:00
}
}
2014-06-09 19:48:35 +04:00
static void filelayout_reset_read ( struct nfs_pgio_header * hdr )
2012-04-28 01:53:46 +04:00
{
2014-06-09 19:48:35 +04:00
struct rpc_task * task = & hdr - > task ;
2012-04-28 01:53:46 +04:00
if ( ! test_and_set_bit ( NFS_IOHDR_REDO , & hdr - > flags ) ) {
dprintk ( " %s Reset task %5u for i/o through MDS "
2013-12-17 21:20:16 +04:00
" (req %s/%llu, %u bytes @ offset %llu) \n " , __func__ ,
2014-06-09 19:48:35 +04:00
hdr - > task . tk_pid ,
2012-05-22 18:10:03 +04:00
hdr - > inode - > i_sb - > s_id ,
2013-12-17 21:20:16 +04:00
( unsigned long long ) NFS_FILEID ( hdr - > inode ) ,
2014-06-09 19:48:35 +04:00
hdr - > args . count ,
( unsigned long long ) hdr - > args . offset ) ;
2012-04-28 01:53:46 +04:00
2014-06-09 19:48:38 +04:00
task - > tk_status = pnfs_read_done_resend_to_mds ( hdr ) ;
2012-04-28 01:53:46 +04:00
}
}
2012-10-11 21:43:38 +04:00
static void filelayout_fenceme ( struct inode * inode , struct pnfs_layout_hdr * lo )
{
if ( ! test_and_clear_bit ( NFS_LAYOUT_RETURN , & lo - > plh_flags ) )
return ;
pnfs_return_layout ( inode ) ;
}
2011-03-01 04:34:20 +03:00
static int filelayout_async_handle_error ( struct rpc_task * task ,
struct nfs4_state * state ,
struct nfs_client * clp ,
2012-04-28 01:53:46 +04:00
struct pnfs_layout_segment * lseg )
2011-03-01 04:34:20 +03:00
{
2012-10-11 21:43:38 +04:00
struct pnfs_layout_hdr * lo = lseg - > pls_layout ;
struct inode * inode = lo - > plh_inode ;
2012-04-28 01:53:46 +04:00
struct nfs_server * mds_server = NFS_SERVER ( inode ) ;
struct nfs4_deviceid_node * devid = FILELAYOUT_DEVID_NODE ( lseg ) ;
2012-03-07 19:49:41 +04:00
struct nfs_client * mds_client = mds_server - > nfs_client ;
2012-04-28 01:53:49 +04:00
struct nfs4_slot_table * tbl = & clp - > cl_session - > fc_slot_table ;
2012-03-07 19:49:41 +04:00
2011-03-01 04:34:20 +03:00
if ( task - > tk_status > = 0 )
return 0 ;
switch ( task - > tk_status ) {
2012-03-07 19:49:41 +04:00
/* MDS state errors */
case - NFS4ERR_DELEG_REVOKED :
case - NFS4ERR_ADMIN_REVOKED :
case - NFS4ERR_BAD_STATEID :
2012-04-28 01:53:46 +04:00
if ( state = = NULL )
break ;
2012-03-08 20:03:53 +04:00
nfs_remove_bad_delegation ( state - > inode ) ;
2012-03-07 19:49:41 +04:00
case - NFS4ERR_OPENMODE :
2012-04-28 01:53:46 +04:00
if ( state = = NULL )
break ;
2013-03-15 00:57:48 +04:00
if ( nfs4_schedule_stateid_recovery ( mds_server , state ) < 0 )
goto out_bad_stateid ;
2012-03-07 19:49:41 +04:00
goto wait_on_recovery ;
case - NFS4ERR_EXPIRED :
2013-03-15 00:57:48 +04:00
if ( state ! = NULL ) {
if ( nfs4_schedule_stateid_recovery ( mds_server , state ) < 0 )
goto out_bad_stateid ;
}
2012-03-07 19:49:41 +04:00
nfs4_schedule_lease_recovery ( mds_client ) ;
goto wait_on_recovery ;
/* DS session errors */
2011-03-01 04:34:20 +03:00
case - NFS4ERR_BADSESSION :
case - NFS4ERR_BADSLOT :
case - NFS4ERR_BAD_HIGH_SLOT :
case - NFS4ERR_DEADSESSION :
case - NFS4ERR_CONN_NOT_BOUND_TO_SESSION :
case - NFS4ERR_SEQ_FALSE_RETRY :
case - NFS4ERR_SEQ_MISORDERED :
dprintk ( " %s ERROR %d, Reset session. Exchangeid "
" flags 0x%x \n " , __func__ , task - > tk_status ,
clp - > cl_exchange_flags ) ;
2012-05-27 21:02:53 +04:00
nfs4_schedule_session_recovery ( clp - > cl_session , task - > tk_status ) ;
2011-03-01 04:34:20 +03:00
break ;
case - NFS4ERR_DELAY :
case - NFS4ERR_GRACE :
rpc_delay ( task , FILELAYOUT_POLL_RETRY_MAX ) ;
break ;
2011-05-03 21:43:03 +04:00
case - NFS4ERR_RETRY_UNCACHED_REP :
break ;
2012-04-28 01:53:53 +04:00
/* Invalidate Layout errors */
case - NFS4ERR_PNFS_NO_LAYOUT :
case - ESTALE : /* mapped NFS4ERR_STALE */
case - EBADHANDLE : /* mapped NFS4ERR_BADHANDLE */
case - EISDIR : /* mapped NFS4ERR_ISDIR */
case - NFS4ERR_FHEXPIRED :
case - NFS4ERR_WRONG_TYPE :
dprintk ( " %s Invalid layout error %d \n " , __func__ ,
task - > tk_status ) ;
/*
* Destroy layout so new i / o will get a new layout .
* Layout will not be destroyed until all current lseg
* references are put . Mark layout as invalid to resend failed
* i / o and all i / o waiting on the slot table to the MDS until
* layout is destroyed and a new valid layout is obtained .
*/
2012-05-22 16:09:28 +04:00
pnfs_destroy_layout ( NFS_I ( inode ) ) ;
2012-04-28 01:53:53 +04:00
rpc_wake_up ( & tbl - > slot_tbl_waitq ) ;
goto reset ;
2012-04-28 01:53:46 +04:00
/* RPC connection errors */
case - ECONNREFUSED :
case - EHOSTDOWN :
case - EHOSTUNREACH :
case - ENETUNREACH :
case - EIO :
case - ETIMEDOUT :
case - EPIPE :
dprintk ( " %s DS connection error %d \n " , __func__ ,
task - > tk_status ) ;
2012-09-19 03:51:12 +04:00
nfs4_mark_deviceid_unavailable ( devid ) ;
2012-10-11 21:43:38 +04:00
set_bit ( NFS_LAYOUT_RETURN , & lo - > plh_flags ) ;
2012-04-28 01:53:49 +04:00
rpc_wake_up ( & tbl - > slot_tbl_waitq ) ;
2012-04-28 01:53:46 +04:00
/* fall through */
2011-03-01 04:34:20 +03:00
default :
2012-04-28 01:53:53 +04:00
reset :
2012-04-28 01:53:46 +04:00
dprintk ( " %s Retry through MDS. Error %d \n " , __func__ ,
2011-03-01 04:34:20 +03:00
task - > tk_status ) ;
2012-04-28 01:53:46 +04:00
return - NFS4ERR_RESET_TO_MDS ;
2011-03-01 04:34:20 +03:00
}
2012-03-07 19:49:41 +04:00
out :
2011-03-01 04:34:20 +03:00
task - > tk_status = 0 ;
return - EAGAIN ;
2013-03-15 00:57:48 +04:00
out_bad_stateid :
task - > tk_status = - EIO ;
return 0 ;
2012-03-07 19:49:41 +04:00
wait_on_recovery :
rpc_sleep_on ( & mds_client - > cl_rpcwaitq , task , NULL ) ;
if ( test_bit ( NFS4CLNT_MANAGER_RUNNING , & mds_client - > cl_state ) = = 0 )
rpc_wake_up_queued_task ( & mds_client - > cl_rpcwaitq , task ) ;
goto out ;
2011-03-01 04:34:20 +03:00
}
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb ( struct rpc_task * task ,
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr )
2011-03-01 04:34:20 +03:00
{
2012-04-28 01:53:46 +04:00
int err ;
2011-03-01 04:34:20 +03:00
2014-06-09 19:48:35 +04:00
trace_nfs4_pnfs_read ( hdr , task - > tk_status ) ;
err = filelayout_async_handle_error ( task , hdr - > args . context - > state ,
hdr - > ds_clp , hdr - > lseg ) ;
2011-03-01 04:34:20 +03:00
2012-04-28 01:53:46 +04:00
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
2014-06-09 19:48:35 +04:00
filelayout_reset_read ( hdr ) ;
2012-04-28 01:53:46 +04:00
return task - > tk_status ;
case - EAGAIN :
2011-10-19 23:17:29 +04:00
rpc_restart_call_prepare ( task ) ;
2011-03-01 04:34:20 +03:00
return - EAGAIN ;
}
return 0 ;
}
2011-03-23 16:27:54 +03:00
/*
* We reference the rpc_cred of the first WRITE that triggers the need for
* a LAYOUTCOMMIT , and use it to send the layoutcommit compound .
* rfc5661 is not clear about which credential should be used .
*/
static void
2014-06-09 19:48:35 +04:00
filelayout_set_layoutcommit ( struct nfs_pgio_header * hdr )
2011-03-23 16:27:54 +03:00
{
2012-04-20 22:47:44 +04:00
if ( FILELAYOUT_LSEG ( hdr - > lseg ) - > commit_through_mds | |
2014-06-09 19:48:35 +04:00
hdr - > res . verf - > committed = = NFS_FILE_SYNC )
2011-03-23 16:27:54 +03:00
return ;
2014-06-09 19:48:35 +04:00
pnfs_set_layoutcommit ( hdr ) ;
2014-06-05 02:54:57 +04:00
dprintk ( " %s inode %lu pls_end_pos %lu \n " , __func__ , hdr - > inode - > i_ino ,
2012-04-20 22:47:44 +04:00
( unsigned long ) NFS_I ( hdr - > inode ) - > layout - > plh_lwb ) ;
2011-03-23 16:27:54 +03:00
}
2012-09-19 03:51:12 +04:00
bool
filelayout_test_devid_unavailable ( struct nfs4_deviceid_node * node )
{
return filelayout_test_devid_invalid ( node ) | |
nfs4_test_deviceid_unavailable ( node ) ;
}
static bool
filelayout_reset_to_mds ( struct pnfs_layout_segment * lseg )
{
struct nfs4_deviceid_node * node = FILELAYOUT_DEVID_NODE ( lseg ) ;
2012-09-21 22:48:04 +04:00
return filelayout_test_devid_unavailable ( node ) ;
2012-09-19 03:51:12 +04:00
}
2011-03-01 04:34:19 +03:00
/*
* Call ops for the async read / write cases
* In the case of dense layouts , the offset needs to be reset to its
* original value .
*/
static void filelayout_read_prepare ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2011-03-01 04:34:19 +03:00
2014-06-09 19:48:35 +04:00
if ( unlikely ( test_bit ( NFS_CONTEXT_BAD , & hdr - > args . context - > flags ) ) ) {
2013-03-19 03:45:14 +04:00
rpc_exit ( task , - EIO ) ;
return ;
}
2014-06-09 19:48:35 +04:00
if ( filelayout_reset_to_mds ( hdr - > lseg ) ) {
2012-04-28 01:53:48 +04:00
dprintk ( " %s task %u reset io to MDS \n " , __func__ , task - > tk_pid ) ;
2014-06-09 19:48:35 +04:00
filelayout_reset_read ( hdr ) ;
2012-04-28 01:53:48 +04:00
rpc_exit ( task , 0 ) ;
return ;
}
2014-06-09 19:48:35 +04:00
hdr - > pgio_done_cb = filelayout_read_done_cb ;
2011-03-01 04:34:20 +03:00
2014-06-09 19:48:35 +04:00
if ( nfs41_setup_sequence ( hdr - > ds_clp - > cl_session ,
& hdr - > args . seq_args ,
& hdr - > res . seq_res ,
2013-03-17 23:52:00 +04:00
task ) )
return ;
2014-06-09 19:48:35 +04:00
if ( nfs4_set_rw_stateid ( & hdr - > args . stateid , hdr - > args . context ,
hdr - > args . lock_context , FMODE_READ ) = = - EIO )
2014-03-04 21:31:09 +04:00
rpc_exit ( task , - EIO ) ; /* lost lock, terminate I/O */
2011-03-01 04:34:19 +03:00
}
static void filelayout_read_call_done ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2011-03-01 04:34:19 +03:00
dprintk ( " --> %s task->tk_status %d \n " , __func__ , task - > tk_status ) ;
2014-06-09 19:48:35 +04:00
if ( test_bit ( NFS_IOHDR_REDO , & hdr - > flags ) & &
2014-01-29 20:34:38 +04:00
task - > tk_status = = 0 ) {
2014-06-09 19:48:35 +04:00
nfs41_sequence_done ( task , & hdr - > res . seq_res ) ;
2012-04-28 01:53:48 +04:00
return ;
2014-01-29 20:34:38 +04:00
}
2012-04-28 01:53:48 +04:00
2011-03-01 04:34:19 +03:00
/* Note this may cause RPC to be resent */
2014-06-09 19:48:35 +04:00
hdr - > mds_ops - > rpc_call_done ( task , data ) ;
2011-03-01 04:34:19 +03:00
}
2012-02-17 22:15:24 +04:00
static void filelayout_read_count_stats ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2012-02-17 22:15:24 +04:00
2014-06-09 19:48:35 +04:00
rpc_count_iostats ( task , NFS_SERVER ( hdr - > inode ) - > client - > cl_metrics ) ;
2012-02-17 22:15:24 +04:00
}
2011-03-01 04:34:19 +03:00
static void filelayout_read_release ( void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
struct pnfs_layout_hdr * lo = hdr - > lseg - > pls_layout ;
2011-03-01 04:34:19 +03:00
2012-10-11 21:43:38 +04:00
filelayout_fenceme ( lo - > plh_inode , lo ) ;
2014-06-09 19:48:35 +04:00
nfs_put_client ( hdr - > ds_clp ) ;
hdr - > mds_ops - > rpc_release ( data ) ;
2011-03-01 04:34:19 +03:00
}
2011-03-03 18:13:47 +03:00
static int filelayout_write_done_cb ( struct rpc_task * task ,
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr )
2011-03-03 18:13:47 +03:00
{
2012-04-28 01:53:46 +04:00
int err ;
2014-06-09 19:48:35 +04:00
trace_nfs4_pnfs_write ( hdr , task - > tk_status ) ;
err = filelayout_async_handle_error ( task , hdr - > args . context - > state ,
hdr - > ds_clp , hdr - > lseg ) ;
2012-04-28 01:53:46 +04:00
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
2014-06-09 19:48:35 +04:00
filelayout_reset_write ( hdr ) ;
2012-04-28 01:53:46 +04:00
return task - > tk_status ;
case - EAGAIN :
2011-10-19 23:17:29 +04:00
rpc_restart_call_prepare ( task ) ;
2011-03-03 18:13:47 +03:00
return - EAGAIN ;
}
2014-06-09 19:48:35 +04:00
filelayout_set_layoutcommit ( hdr ) ;
2011-03-03 18:13:47 +03:00
return 0 ;
}
2011-03-23 16:27:53 +03:00
/* Fake up some data that will cause nfs_commit_release to retry the writes. */
2012-04-20 22:47:39 +04:00
static void prepare_to_resend_writes ( struct nfs_commit_data * data )
2011-03-23 16:27:53 +03:00
{
struct nfs_page * first = nfs_list_entry ( data - > pages . next ) ;
data - > task . tk_status = 0 ;
2012-06-08 19:56:09 +04:00
memcpy ( & data - > verf . verifier , & first - > wb_verf ,
sizeof ( data - > verf . verifier ) ) ;
data - > verf . verifier . data [ 0 ] + + ; /* ensure verifier mismatch */
2011-03-23 16:27:53 +03:00
}
static int filelayout_commit_done_cb ( struct rpc_task * task ,
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data )
2011-03-23 16:27:53 +03:00
{
2012-04-28 01:53:46 +04:00
int err ;
2013-08-14 23:31:28 +04:00
trace_nfs4_pnfs_commit_ds ( data , task - > tk_status ) ;
2012-04-28 01:53:46 +04:00
err = filelayout_async_handle_error ( task , NULL , data - > ds_clp ,
data - > lseg ) ;
switch ( err ) {
case - NFS4ERR_RESET_TO_MDS :
prepare_to_resend_writes ( data ) ;
return - EAGAIN ;
case - EAGAIN :
rpc_restart_call_prepare ( task ) ;
2011-03-23 16:27:53 +03:00
return - EAGAIN ;
}
return 0 ;
}
2011-03-03 18:13:47 +03:00
static void filelayout_write_prepare ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2011-03-03 18:13:47 +03:00
2014-06-09 19:48:35 +04:00
if ( unlikely ( test_bit ( NFS_CONTEXT_BAD , & hdr - > args . context - > flags ) ) ) {
2013-03-19 03:45:14 +04:00
rpc_exit ( task , - EIO ) ;
return ;
}
2014-06-09 19:48:35 +04:00
if ( filelayout_reset_to_mds ( hdr - > lseg ) ) {
2012-04-28 01:53:48 +04:00
dprintk ( " %s task %u reset io to MDS \n " , __func__ , task - > tk_pid ) ;
2014-06-09 19:48:35 +04:00
filelayout_reset_write ( hdr ) ;
2012-04-28 01:53:48 +04:00
rpc_exit ( task , 0 ) ;
return ;
}
2014-06-09 19:48:35 +04:00
if ( nfs41_setup_sequence ( hdr - > ds_clp - > cl_session ,
& hdr - > args . seq_args ,
& hdr - > res . seq_res ,
2013-03-17 23:52:00 +04:00
task ) )
return ;
2014-06-09 19:48:35 +04:00
if ( nfs4_set_rw_stateid ( & hdr - > args . stateid , hdr - > args . context ,
hdr - > args . lock_context , FMODE_WRITE ) = = - EIO )
2014-03-04 21:31:09 +04:00
rpc_exit ( task , - EIO ) ; /* lost lock, terminate I/O */
2011-03-03 18:13:47 +03:00
}
static void filelayout_write_call_done ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2011-03-03 18:13:47 +03:00
2014-06-09 19:48:35 +04:00
if ( test_bit ( NFS_IOHDR_REDO , & hdr - > flags ) & &
2014-01-29 20:34:38 +04:00
task - > tk_status = = 0 ) {
2014-06-09 19:48:35 +04:00
nfs41_sequence_done ( task , & hdr - > res . seq_res ) ;
2012-04-28 01:53:48 +04:00
return ;
2014-01-29 20:34:38 +04:00
}
2012-04-28 01:53:48 +04:00
2011-03-03 18:13:47 +03:00
/* Note this may cause RPC to be resent */
2014-06-09 19:48:35 +04:00
hdr - > mds_ops - > rpc_call_done ( task , data ) ;
2011-03-03 18:13:47 +03:00
}
2012-02-17 22:15:24 +04:00
static void filelayout_write_count_stats ( struct rpc_task * task , void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
2012-02-17 22:15:24 +04:00
2014-06-09 19:48:35 +04:00
rpc_count_iostats ( task , NFS_SERVER ( hdr - > inode ) - > client - > cl_metrics ) ;
2012-02-17 22:15:24 +04:00
}
2011-03-03 18:13:47 +03:00
static void filelayout_write_release ( void * data )
{
2014-06-09 19:48:35 +04:00
struct nfs_pgio_header * hdr = data ;
struct pnfs_layout_hdr * lo = hdr - > lseg - > pls_layout ;
2011-03-03 18:13:47 +03:00
2012-10-11 21:43:38 +04:00
filelayout_fenceme ( lo - > plh_inode , lo ) ;
2014-06-09 19:48:35 +04:00
nfs_put_client ( hdr - > ds_clp ) ;
hdr - > mds_ops - > rpc_release ( data ) ;
2011-03-03 18:13:47 +03:00
}
2012-04-20 22:47:39 +04:00
static void filelayout_commit_prepare ( struct rpc_task * task , void * data )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * wdata = data ;
2011-03-23 16:27:53 +03:00
2012-10-23 04:28:44 +04:00
nfs41_setup_sequence ( wdata - > ds_clp - > cl_session ,
& wdata - > args . seq_args ,
& wdata - > res . seq_res ,
task ) ;
2012-04-20 22:47:39 +04:00
}
static void filelayout_write_commit_done ( struct rpc_task * task , void * data )
{
struct nfs_commit_data * wdata = data ;
/* Note this may cause RPC to be resent */
wdata - > mds_ops - > rpc_call_done ( task , data ) ;
}
static void filelayout_commit_count_stats ( struct rpc_task * task , void * data )
{
struct nfs_commit_data * cdata = data ;
rpc_count_iostats ( task , NFS_SERVER ( cdata - > inode ) - > client - > cl_metrics ) ;
}
static void filelayout_commit_release ( void * calldata )
{
struct nfs_commit_data * data = calldata ;
2012-04-20 22:47:54 +04:00
data - > completion_ops - > completion ( data ) ;
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( data - > lseg ) ;
2012-04-28 01:53:51 +04:00
nfs_put_client ( data - > ds_clp ) ;
2012-04-20 22:47:39 +04:00
nfs_commitdata_release ( data ) ;
2011-03-23 16:27:53 +03:00
}
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_read_call_ops = {
2011-03-01 04:34:19 +03:00
. rpc_call_prepare = filelayout_read_prepare ,
. rpc_call_done = filelayout_read_call_done ,
2012-02-17 22:15:24 +04:00
. rpc_count_stats = filelayout_read_count_stats ,
2011-03-01 04:34:19 +03:00
. rpc_release = filelayout_read_release ,
} ;
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_write_call_ops = {
2011-03-03 18:13:47 +03:00
. rpc_call_prepare = filelayout_write_prepare ,
. rpc_call_done = filelayout_write_call_done ,
2012-02-17 22:15:24 +04:00
. rpc_count_stats = filelayout_write_count_stats ,
2011-03-03 18:13:47 +03:00
. rpc_release = filelayout_write_release ,
} ;
2012-03-11 21:11:00 +04:00
static const struct rpc_call_ops filelayout_commit_call_ops = {
2012-04-20 22:47:39 +04:00
. rpc_call_prepare = filelayout_commit_prepare ,
. rpc_call_done = filelayout_write_commit_done ,
. rpc_count_stats = filelayout_commit_count_stats ,
2011-03-23 16:27:53 +03:00
. rpc_release = filelayout_commit_release ,
} ;
2011-03-01 04:34:19 +03:00
static enum pnfs_try_status
2014-06-09 19:48:35 +04:00
filelayout_read_pagelist ( struct nfs_pgio_header * hdr )
2011-03-01 04:34:19 +03:00
{
2012-04-20 22:47:44 +04:00
struct pnfs_layout_segment * lseg = hdr - > lseg ;
2011-03-01 04:34:19 +03:00
struct nfs4_pnfs_ds * ds ;
2013-09-06 22:14:00 +04:00
struct rpc_clnt * ds_clnt ;
2014-06-09 19:48:35 +04:00
loff_t offset = hdr - > args . offset ;
2011-03-01 04:34:19 +03:00
u32 j , idx ;
struct nfs_fh * fh ;
dprintk ( " --> %s ino %lu pgbase %u req %Zu@%llu \n " ,
2012-04-20 22:47:44 +04:00
__func__ , hdr - > inode - > i_ino ,
2014-06-09 19:48:35 +04:00
hdr - > args . pgbase , ( size_t ) hdr - > args . count , offset ) ;
2011-03-01 04:34:19 +03:00
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index ( lseg , offset ) ;
idx = nfs4_fl_calc_ds_index ( lseg , j ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
2012-04-28 01:53:43 +04:00
if ( ! ds )
2011-03-01 04:34:19 +03:00
return PNFS_NOT_ATTEMPTED ;
2013-09-06 22:14:00 +04:00
ds_clnt = nfs4_find_or_create_ds_client ( ds - > ds_clp , hdr - > inode ) ;
if ( IS_ERR ( ds_clnt ) )
return PNFS_NOT_ATTEMPTED ;
2012-04-28 01:53:51 +04:00
dprintk ( " %s USE DS: %s cl_count %d \n " , __func__ ,
ds - > ds_remotestr , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2011-03-01 04:34:19 +03:00
/* No multipath support. Use first DS */
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2014-06-09 19:48:35 +04:00
hdr - > ds_clp = ds - > ds_clp ;
hdr - > ds_idx = idx ;
2011-03-01 04:34:19 +03:00
fh = nfs4_fl_select_ds_fh ( lseg , j ) ;
if ( fh )
2014-06-09 19:48:35 +04:00
hdr - > args . fh = fh ;
2011-03-01 04:34:19 +03:00
2014-06-09 19:48:35 +04:00
hdr - > args . offset = filelayout_get_dserver_offset ( lseg , offset ) ;
hdr - > mds_offset = offset ;
2011-03-01 04:34:19 +03:00
/* Perform an asynchronous read to ds */
2014-06-09 19:48:35 +04:00
nfs_initiate_pgio ( ds_clnt , hdr ,
2014-05-06 17:12:37 +04:00
& filelayout_read_call_ops , 0 , RPC_TASK_SOFTCONN ) ;
2011-03-01 04:34:19 +03:00
return PNFS_ATTEMPTED ;
}
2011-03-03 18:13:47 +03:00
/* Perform async writes. */
2011-03-03 18:13:45 +03:00
static enum pnfs_try_status
2014-06-09 19:48:35 +04:00
filelayout_write_pagelist ( struct nfs_pgio_header * hdr , int sync )
2011-03-03 18:13:45 +03:00
{
2012-04-20 22:47:44 +04:00
struct pnfs_layout_segment * lseg = hdr - > lseg ;
2011-03-03 18:13:47 +03:00
struct nfs4_pnfs_ds * ds ;
2013-09-06 22:14:00 +04:00
struct rpc_clnt * ds_clnt ;
2014-06-09 19:48:35 +04:00
loff_t offset = hdr - > args . offset ;
2011-03-03 18:13:47 +03:00
u32 j , idx ;
struct nfs_fh * fh ;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index ( lseg , offset ) ;
idx = nfs4_fl_calc_ds_index ( lseg , j ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
2012-04-28 01:53:43 +04:00
if ( ! ds )
2011-03-03 18:13:47 +03:00
return PNFS_NOT_ATTEMPTED ;
2013-09-06 22:14:00 +04:00
ds_clnt = nfs4_find_or_create_ds_client ( ds - > ds_clp , hdr - > inode ) ;
if ( IS_ERR ( ds_clnt ) )
return PNFS_NOT_ATTEMPTED ;
2012-04-28 01:53:51 +04:00
dprintk ( " %s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d \n " ,
2014-06-09 19:48:35 +04:00
__func__ , hdr - > inode - > i_ino , sync , ( size_t ) hdr - > args . count ,
2012-04-28 01:53:51 +04:00
offset , ds - > ds_remotestr , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2011-03-03 18:13:47 +03:00
2014-06-09 19:48:35 +04:00
hdr - > pgio_done_cb = filelayout_write_done_cb ;
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2014-06-09 19:48:35 +04:00
hdr - > ds_clp = ds - > ds_clp ;
hdr - > ds_idx = idx ;
2011-03-03 18:13:47 +03:00
fh = nfs4_fl_select_ds_fh ( lseg , j ) ;
if ( fh )
2014-06-09 19:48:35 +04:00
hdr - > args . fh = fh ;
hdr - > args . offset = filelayout_get_dserver_offset ( lseg , offset ) ;
2011-03-03 18:13:47 +03:00
/* Perform an asynchronous write */
2014-06-09 19:48:35 +04:00
nfs_initiate_pgio ( ds_clnt , hdr ,
2012-04-28 01:53:44 +04:00
& filelayout_write_call_ops , sync ,
RPC_TASK_SOFTCONN ) ;
2011-03-03 18:13:47 +03:00
return PNFS_ATTEMPTED ;
2011-03-03 18:13:45 +03:00
}
2010-10-20 08:18:04 +04:00
/*
* filelayout_check_layout ( )
*
* Make sure layout segment parameters are sane WRT the device .
* At this point no generic layer initialization of the lseg has occurred ,
* and nothing has been added to the layout_hdr cache .
*
*/
static int
filelayout_check_layout ( struct pnfs_layout_hdr * lo ,
struct nfs4_filelayout_segment * fl ,
struct nfs4_layoutget_res * lgr ,
2011-05-12 02:00:51 +04:00
struct nfs4_deviceid * id ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-05-20 06:14:47 +04:00
struct nfs4_deviceid_node * d ;
2010-10-20 08:18:04 +04:00
struct nfs4_file_layout_dsaddr * dsaddr ;
int status = - EINVAL ;
dprintk ( " --> %s \n " , __func__ ) ;
2011-06-14 02:22:38 +04:00
/* FIXME: remove this check when layout segment support is added */
if ( lgr - > range . offset ! = 0 | |
lgr - > range . length ! = NFS4_MAX_UINT64 ) {
dprintk ( " %s Only whole file layouts supported. Use MDS i/o \n " ,
__func__ ) ;
goto out ;
}
2010-10-20 08:18:04 +04:00
if ( fl - > pattern_offset > lgr - > range . offset ) {
2011-02-23 03:31:57 +03:00
dprintk ( " %s pattern_offset %lld too large \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > pattern_offset ) ;
goto out ;
}
2014-05-15 19:56:56 +04:00
if ( ! fl - > stripe_unit ) {
2011-02-23 02:56:01 +03:00
dprintk ( " %s Invalid stripe unit (%u) \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > stripe_unit ) ;
goto out ;
}
/* find and reference the deviceid */
2011-05-24 19:04:02 +04:00
d = nfs4_find_get_deviceid ( NFS_SERVER ( lo - > plh_inode ) - > pnfs_curr_ld ,
NFS_SERVER ( lo - > plh_inode ) - > nfs_client , id ) ;
2011-05-20 06:14:47 +04:00
if ( d = = NULL ) {
2013-05-20 19:42:54 +04:00
dsaddr = filelayout_get_device_info ( lo - > plh_inode , id ,
lo - > plh_lc_cred , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( dsaddr = = NULL )
goto out ;
2011-05-20 06:14:47 +04:00
} else
dsaddr = container_of ( d , struct nfs4_file_layout_dsaddr , id_node ) ;
2012-09-19 03:51:12 +04:00
/* Found deviceid is unavailable */
if ( filelayout_test_devid_unavailable ( & dsaddr - > id_node ) )
2011-06-16 01:52:40 +04:00
goto out_put ;
2010-10-20 08:18:04 +04:00
fl - > dsaddr = dsaddr ;
2011-10-25 20:18:03 +04:00
if ( fl - > first_stripe_index > = dsaddr - > stripe_count ) {
dprintk ( " %s Bad first_stripe_index %u \n " ,
2010-10-20 08:18:04 +04:00
__func__ , fl - > first_stripe_index ) ;
goto out_put ;
}
if ( ( fl - > stripe_type = = STRIPE_SPARSE & &
fl - > num_fh > 1 & & fl - > num_fh ! = dsaddr - > ds_num ) | |
( fl - > stripe_type = = STRIPE_DENSE & &
fl - > num_fh ! = dsaddr - > stripe_count ) ) {
dprintk ( " %s num_fh %u not valid for given packing \n " ,
__func__ , fl - > num_fh ) ;
goto out_put ;
}
status = 0 ;
out :
dprintk ( " --> %s returns %d \n " , __func__ , status ) ;
return status ;
out_put :
2011-03-01 04:34:21 +03:00
nfs4_fl_put_deviceid ( dsaddr ) ;
2010-10-20 08:18:04 +04:00
goto out ;
}
static void filelayout_free_fh_array ( struct nfs4_filelayout_segment * fl )
{
int i ;
for ( i = 0 ; i < fl - > num_fh ; i + + ) {
if ( ! fl - > fh_array [ i ] )
break ;
kfree ( fl - > fh_array [ i ] ) ;
}
kfree ( fl - > fh_array ) ;
fl - > fh_array = NULL ;
}
static void
_filelayout_free_lseg ( struct nfs4_filelayout_segment * fl )
{
filelayout_free_fh_array ( fl ) ;
kfree ( fl ) ;
}
static int
filelayout_decode_layout ( struct pnfs_layout_hdr * flo ,
struct nfs4_filelayout_segment * fl ,
struct nfs4_layoutget_res * lgr ,
2011-05-12 02:00:51 +04:00
struct nfs4_deviceid * id ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
2011-03-24 23:48:21 +03:00
struct xdr_stream stream ;
2011-05-19 22:16:47 +04:00
struct xdr_buf buf ;
2011-03-24 23:48:21 +03:00
struct page * scratch ;
__be32 * p ;
2010-10-20 08:18:04 +04:00
uint32_t nfl_util ;
int i ;
dprintk ( " %s: set_layout_map Begin \n " , __func__ ) ;
2011-05-12 02:00:51 +04:00
scratch = alloc_page ( gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! scratch )
return - ENOMEM ;
2011-05-19 22:16:47 +04:00
xdr_init_decode_pages ( & stream , & buf , lgr - > layoutp - > pages , lgr - > layoutp - > len ) ;
2011-03-24 23:48:21 +03:00
xdr_set_scratch_buffer ( & stream , page_address ( scratch ) , PAGE_SIZE ) ;
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
* num_fh ( 4 ) */
p = xdr_inline_decode ( & stream , NFS4_DEVICEID4_SIZE + 20 ) ;
if ( unlikely ( ! p ) )
goto out_err ;
2010-10-20 08:18:04 +04:00
memcpy ( id , p , sizeof ( * id ) ) ;
p + = XDR_QUADLEN ( NFS4_DEVICEID4_SIZE ) ;
2011-05-20 06:14:47 +04:00
nfs4_print_deviceid ( id ) ;
2010-10-20 08:18:04 +04:00
nfl_util = be32_to_cpup ( p + + ) ;
if ( nfl_util & NFL4_UFLG_COMMIT_THRU_MDS )
fl - > commit_through_mds = 1 ;
if ( nfl_util & NFL4_UFLG_DENSE )
fl - > stripe_type = STRIPE_DENSE ;
else
fl - > stripe_type = STRIPE_SPARSE ;
fl - > stripe_unit = nfl_util & ~ NFL4_UFLG_MASK ;
fl - > first_stripe_index = be32_to_cpup ( p + + ) ;
p = xdr_decode_hyper ( p , & fl - > pattern_offset ) ;
fl - > num_fh = be32_to_cpup ( p + + ) ;
dprintk ( " %s: nfl_util 0x%X num_fh %u fsi %u po %llu \n " ,
__func__ , nfl_util , fl - > num_fh , fl - > first_stripe_index ,
fl - > pattern_offset ) ;
2011-06-14 02:36:17 +04:00
/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
* Futher checking is done in filelayout_check_layout */
2011-10-25 20:18:03 +04:00
if ( fl - > num_fh >
2011-06-14 02:36:17 +04:00
max ( NFS4_PNFS_MAX_STRIPE_CNT , NFS4_PNFS_MAX_MULTI_CNT ) )
2011-03-24 23:48:21 +03:00
goto out_err ;
2011-06-14 02:36:17 +04:00
if ( fl - > num_fh > 0 ) {
2012-10-11 22:36:52 +04:00
fl - > fh_array = kcalloc ( fl - > num_fh , sizeof ( fl - > fh_array [ 0 ] ) ,
2011-06-14 02:36:17 +04:00
gfp_flags ) ;
if ( ! fl - > fh_array )
goto out_err ;
}
2010-10-20 08:18:04 +04:00
for ( i = 0 ; i < fl - > num_fh ; i + + ) {
/* Do we want to use a mempool here? */
2011-05-12 02:00:51 +04:00
fl - > fh_array [ i ] = kmalloc ( sizeof ( struct nfs_fh ) , gfp_flags ) ;
2011-03-24 23:48:21 +03:00
if ( ! fl - > fh_array [ i ] )
goto out_err_free ;
p = xdr_inline_decode ( & stream , 4 ) ;
if ( unlikely ( ! p ) )
goto out_err_free ;
2010-10-20 08:18:04 +04:00
fl - > fh_array [ i ] - > size = be32_to_cpup ( p + + ) ;
if ( sizeof ( struct nfs_fh ) < fl - > fh_array [ i ] - > size ) {
2012-01-26 22:32:22 +04:00
printk ( KERN_ERR " NFS: Too big fh %d received %d \n " ,
2010-10-20 08:18:04 +04:00
i , fl - > fh_array [ i ] - > size ) ;
2011-03-24 23:48:21 +03:00
goto out_err_free ;
2010-10-20 08:18:04 +04:00
}
2011-03-24 23:48:21 +03:00
p = xdr_inline_decode ( & stream , fl - > fh_array [ i ] - > size ) ;
if ( unlikely ( ! p ) )
goto out_err_free ;
2010-10-20 08:18:04 +04:00
memcpy ( fl - > fh_array [ i ] - > data , p , fl - > fh_array [ i ] - > size ) ;
dprintk ( " DEBUG: %s: fh len %d \n " , __func__ ,
fl - > fh_array [ i ] - > size ) ;
}
2011-03-24 23:48:21 +03:00
__free_page ( scratch ) ;
2010-10-20 08:18:04 +04:00
return 0 ;
2011-03-24 23:48:21 +03:00
out_err_free :
filelayout_free_fh_array ( fl ) ;
out_err :
__free_page ( scratch ) ;
return - EIO ;
2010-10-20 08:18:04 +04:00
}
2011-03-23 16:27:49 +03:00
static void
filelayout_free_lseg ( struct pnfs_layout_segment * lseg )
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
dprintk ( " --> %s \n " , __func__ ) ;
nfs4_fl_put_deviceid ( fl - > dsaddr ) ;
2012-04-20 22:47:38 +04:00
/* This assumes a single RW lseg */
if ( lseg - > pls_range . iomode = = IOMODE_RW ) {
struct nfs4_filelayout * flo ;
flo = FILELAYOUT_FROM_HDR ( lseg - > pls_layout ) ;
flo - > commit_info . nbuckets = 0 ;
kfree ( flo - > commit_info . buckets ) ;
flo - > commit_info . buckets = NULL ;
}
2011-03-23 16:27:49 +03:00
_filelayout_free_lseg ( fl ) ;
}
2012-04-20 22:47:38 +04:00
static int
filelayout_alloc_commit_info ( struct pnfs_layout_segment * lseg ,
2012-04-20 22:47:53 +04:00
struct nfs_commit_info * cinfo ,
2012-04-20 22:47:38 +04:00
gfp_t gfp_flags )
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2014-05-15 19:56:49 +04:00
int size , i ;
2012-04-20 22:47:38 +04:00
if ( fl - > commit_through_mds )
return 0 ;
2014-05-15 19:56:49 +04:00
size = ( fl - > stripe_type = = STRIPE_SPARSE ) ?
fl - > dsaddr - > ds_num : fl - > dsaddr - > stripe_count ;
if ( cinfo - > ds - > nbuckets > = size ) {
2012-04-20 22:47:38 +04:00
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of < multipath_list4 , fh > keys , each
* associated with a struct list_head , populated by calls
* to filelayout_write_pagelist ( ) .
* */
return 0 ;
}
2012-04-20 22:47:53 +04:00
buckets = kcalloc ( size , sizeof ( struct pnfs_commit_bucket ) ,
2012-04-20 22:47:38 +04:00
gfp_flags ) ;
if ( ! buckets )
return - ENOMEM ;
2014-05-15 19:56:49 +04:00
for ( i = 0 ; i < size ; i + + ) {
INIT_LIST_HEAD ( & buckets [ i ] . written ) ;
INIT_LIST_HEAD ( & buckets [ i ] . committing ) ;
2014-05-15 19:56:54 +04:00
/* mark direct verifier as unset */
buckets [ i ] . direct_verf . committed = NFS_INVALID_STABLE_HOW ;
2012-04-20 22:47:38 +04:00
}
2014-05-15 19:56:49 +04:00
spin_lock ( cinfo - > lock ) ;
if ( cinfo - > ds - > nbuckets > = size )
goto out ;
for ( i = 0 ; i < cinfo - > ds - > nbuckets ; i + + ) {
list_splice ( & cinfo - > ds - > buckets [ i ] . written ,
& buckets [ i ] . written ) ;
list_splice ( & cinfo - > ds - > buckets [ i ] . committing ,
& buckets [ i ] . committing ) ;
2014-05-15 19:56:54 +04:00
buckets [ i ] . direct_verf . committed =
cinfo - > ds - > buckets [ i ] . direct_verf . committed ;
2014-05-15 19:56:49 +04:00
buckets [ i ] . wlseg = cinfo - > ds - > buckets [ i ] . wlseg ;
buckets [ i ] . clseg = cinfo - > ds - > buckets [ i ] . clseg ;
}
swap ( cinfo - > ds - > buckets , buckets ) ;
cinfo - > ds - > nbuckets = size ;
out :
spin_unlock ( cinfo - > lock ) ;
kfree ( buckets ) ;
return 0 ;
2012-04-20 22:47:38 +04:00
}
2010-10-20 08:18:04 +04:00
static struct pnfs_layout_segment *
filelayout_alloc_lseg ( struct pnfs_layout_hdr * layoutid ,
2011-05-12 02:00:51 +04:00
struct nfs4_layoutget_res * lgr ,
gfp_t gfp_flags )
2010-10-20 08:18:04 +04:00
{
struct nfs4_filelayout_segment * fl ;
int rc ;
struct nfs4_deviceid id ;
dprintk ( " --> %s \n " , __func__ ) ;
2011-05-12 02:00:51 +04:00
fl = kzalloc ( sizeof ( * fl ) , gfp_flags ) ;
2010-10-20 08:18:04 +04:00
if ( ! fl )
return NULL ;
2011-05-12 02:00:51 +04:00
rc = filelayout_decode_layout ( layoutid , fl , lgr , & id , gfp_flags ) ;
if ( rc ! = 0 | | filelayout_check_layout ( layoutid , fl , lgr , & id , gfp_flags ) ) {
2010-10-20 08:18:04 +04:00
_filelayout_free_lseg ( fl ) ;
return NULL ;
}
return & fl - > generic_hdr ;
}
2011-03-01 04:34:14 +03:00
/*
* filelayout_pg_test ( ) . Called by nfs_can_coalesce_requests ( )
*
2014-05-15 19:56:43 +04:00
* Return 0 if @ req cannot be coalesced into @ pgio , otherwise return the number
* of bytes ( maximum @ req - > wb_bytes ) that can be coalesced .
2011-03-01 04:34:14 +03:00
*/
2014-05-15 19:56:43 +04:00
static size_t
2011-03-01 04:34:14 +03:00
filelayout_pg_test ( struct nfs_pageio_descriptor * pgio , struct nfs_page * prev ,
struct nfs_page * req )
{
2014-05-15 19:56:51 +04:00
unsigned int size ;
2011-03-01 04:34:14 +03:00
u64 p_stripe , r_stripe ;
2014-05-15 19:56:56 +04:00
u32 stripe_offset ;
u64 segment_offset = pgio - > pg_lseg - > pls_range . offset ;
u32 stripe_unit = FILELAYOUT_LSEG ( pgio - > pg_lseg ) - > stripe_unit ;
2011-03-01 04:34:14 +03:00
2014-05-15 19:56:51 +04:00
/* calls nfs_generic_pg_test */
size = pnfs_generic_pg_test ( pgio , prev , req ) ;
if ( ! size )
2014-05-15 19:56:43 +04:00
return 0 ;
2011-05-25 21:54:40 +04:00
2014-05-15 19:56:56 +04:00
/* see if req and prev are in the same stripe */
2014-05-15 19:56:51 +04:00
if ( prev ) {
2014-05-15 19:56:56 +04:00
p_stripe = ( u64 ) req_offset ( prev ) - segment_offset ;
r_stripe = ( u64 ) req_offset ( req ) - segment_offset ;
2014-05-15 19:56:51 +04:00
do_div ( p_stripe , stripe_unit ) ;
do_div ( r_stripe , stripe_unit ) ;
2011-03-01 04:34:14 +03:00
2014-05-15 19:56:51 +04:00
if ( p_stripe ! = r_stripe )
return 0 ;
}
2014-05-15 19:56:56 +04:00
/* calculate remaining bytes in the current stripe */
div_u64_rem ( ( u64 ) req_offset ( req ) - segment_offset ,
stripe_unit ,
& stripe_offset ) ;
WARN_ON_ONCE ( stripe_offset > stripe_unit ) ;
if ( stripe_offset > = stripe_unit )
return 0 ;
return min ( stripe_unit - ( unsigned int ) stripe_offset , size ) ;
2011-03-01 04:34:14 +03:00
}
2012-03-11 21:11:00 +04:00
static void
2011-06-14 02:22:38 +04:00
filelayout_pg_init_read ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * req )
{
2014-05-15 19:56:56 +04:00
if ( ! pgio - > pg_lseg )
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
2011-06-14 02:22:38 +04:00
req - > wb_context ,
0 ,
NFS4_MAX_UINT64 ,
IOMODE_READ ,
GFP_KERNEL ) ;
/* If no lseg, fall back to read through mds */
if ( pgio - > pg_lseg = = NULL )
2011-07-13 23:59:57 +04:00
nfs_pageio_reset_read_mds ( pgio ) ;
2011-06-14 02:22:38 +04:00
}
2012-03-11 21:11:00 +04:00
static void
2011-06-14 02:22:38 +04:00
filelayout_pg_init_write ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * req )
{
2012-04-20 22:47:53 +04:00
struct nfs_commit_info cinfo ;
2012-04-20 22:47:38 +04:00
int status ;
2014-05-15 19:56:56 +04:00
if ( ! pgio - > pg_lseg )
pgio - > pg_lseg = pnfs_update_layout ( pgio - > pg_inode ,
2011-06-14 02:22:38 +04:00
req - > wb_context ,
0 ,
NFS4_MAX_UINT64 ,
IOMODE_RW ,
GFP_NOFS ) ;
/* If no lseg, fall back to write through mds */
if ( pgio - > pg_lseg = = NULL )
2012-04-20 22:47:38 +04:00
goto out_mds ;
2012-04-20 22:47:53 +04:00
nfs_init_cinfo ( & cinfo , pgio - > pg_inode , pgio - > pg_dreq ) ;
status = filelayout_alloc_commit_info ( pgio - > pg_lseg , & cinfo , GFP_NOFS ) ;
2012-04-20 22:47:38 +04:00
if ( status < 0 ) {
2012-09-19 04:57:08 +04:00
pnfs_put_lseg ( pgio - > pg_lseg ) ;
2012-04-20 22:47:38 +04:00
pgio - > pg_lseg = NULL ;
goto out_mds ;
}
return ;
out_mds :
nfs_pageio_reset_write_mds ( pgio ) ;
2011-06-14 02:22:38 +04:00
}
2011-06-10 21:30:23 +04:00
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
2011-06-14 02:22:38 +04:00
. pg_init = filelayout_pg_init_read ,
2011-06-10 21:30:23 +04:00
. pg_test = filelayout_pg_test ,
2011-07-13 23:58:28 +04:00
. pg_doio = pnfs_generic_pg_readpages ,
2011-06-10 21:30:23 +04:00
} ;
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
2011-06-14 02:22:38 +04:00
. pg_init = filelayout_pg_init_write ,
2011-06-10 21:30:23 +04:00
. pg_test = filelayout_pg_test ,
2011-07-13 23:59:19 +04:00
. pg_doio = pnfs_generic_pg_writepages ,
2011-06-10 21:30:23 +04:00
} ;
2011-03-23 16:27:53 +03:00
static u32 select_bucket_index ( struct nfs4_filelayout_segment * fl , u32 j )
{
if ( fl - > stripe_type = = STRIPE_SPARSE )
return nfs4_fl_calc_ds_index ( & fl - > generic_hdr , j ) ;
else
return j ;
}
2012-03-09 02:29:35 +04:00
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty , it will need to put the lseg reference .
2014-07-18 04:42:19 +04:00
* Note this is must be called holding the inode ( / cinfo ) lock
2012-03-09 02:29:35 +04:00
*/
2012-03-16 01:16:40 +04:00
static void
2012-04-20 22:47:53 +04:00
filelayout_clear_request_commit ( struct nfs_page * req ,
struct nfs_commit_info * cinfo )
2012-03-09 02:29:35 +04:00
{
2012-03-16 01:16:40 +04:00
struct pnfs_layout_segment * freeme = NULL ;
if ( ! test_and_clear_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) )
goto out ;
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten - - ;
2012-03-09 02:29:35 +04:00
if ( list_is_singular ( & req - > wb_list ) ) {
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * bucket ;
2012-03-09 02:29:35 +04:00
2012-04-20 22:47:38 +04:00
bucket = list_first_entry ( & req - > wb_list ,
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket ,
2012-04-20 22:47:38 +04:00
written ) ;
freeme = bucket - > wlseg ;
bucket - > wlseg = NULL ;
2012-03-09 02:29:35 +04:00
}
2012-03-16 01:16:40 +04:00
out :
2012-04-20 22:47:53 +04:00
nfs_request_remove_commit_list ( req , cinfo ) ;
2014-07-18 04:42:19 +04:00
pnfs_put_lseg_async ( freeme ) ;
2012-03-09 02:29:35 +04:00
}
2014-07-03 09:07:45 +04:00
static void
filelayout_mark_request_commit ( struct nfs_page * req ,
struct pnfs_layout_segment * lseg ,
struct nfs_commit_info * cinfo )
2011-03-23 16:27:53 +03:00
{
struct nfs4_filelayout_segment * fl = FILELAYOUT_LSEG ( lseg ) ;
u32 i , j ;
struct list_head * list ;
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2011-03-23 16:27:53 +03:00
2014-07-03 09:07:45 +04:00
if ( fl - > commit_through_mds ) {
list = & cinfo - > mds - > list ;
spin_lock ( cinfo - > lock ) ;
goto mds_commit ;
}
2012-03-09 02:29:35 +04:00
2011-03-23 16:27:53 +03:00
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server . An attractive
* alternative is to add a field to nfs_write_data and nfs_page
* to store the value calculated in filelayout_write_pagelist
* and just use that here .
*/
2012-04-20 22:47:43 +04:00
j = nfs4_fl_calc_j_index ( lseg , req_offset ( req ) ) ;
2011-03-23 16:27:53 +03:00
i = select_bucket_index ( fl , j ) ;
2014-05-15 19:56:40 +04:00
spin_lock ( cinfo - > lock ) ;
2012-04-20 22:47:53 +04:00
buckets = cinfo - > ds - > buckets ;
2012-04-20 22:47:38 +04:00
list = & buckets [ i ] . written ;
2011-03-23 16:27:53 +03:00
if ( list_empty ( list ) ) {
2012-03-09 02:29:35 +04:00
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there . It could also be released if the last req is pulled
* off due to a rewrite , in which case it will be done in
2012-04-20 22:47:38 +04:00
* filelayout_clear_request_commit
2012-03-09 02:29:35 +04:00
*/
2012-09-19 04:57:08 +04:00
buckets [ i ] . wlseg = pnfs_get_lseg ( lseg ) ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 01:16:40 +04:00
set_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) ;
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten + + ;
2011-03-23 16:27:53 +03:00
2014-07-03 09:07:45 +04:00
mds_commit :
/* nfs_request_add_commit_list(). We need to add req to list without
* dropping cinfo lock .
*/
set_bit ( PG_CLEAN , & ( req ) - > wb_flags ) ;
nfs_list_add_request ( req , list ) ;
cinfo - > mds - > ncommit + + ;
spin_unlock ( cinfo - > lock ) ;
if ( ! cinfo - > dreq ) {
inc_zone_page_state ( req - > wb_page , NR_UNSTABLE_NFS ) ;
inc_bdi_stat ( page_file_mapping ( req - > wb_page ) - > backing_dev_info ,
BDI_RECLAIMABLE ) ;
__mark_inode_dirty ( req - > wb_context - > dentry - > d_inode ,
I_DIRTY_DATASYNC ) ;
}
2012-03-16 01:16:40 +04:00
}
2011-03-23 16:27:53 +03:00
static u32 calc_ds_index_from_commit ( struct pnfs_layout_segment * lseg , u32 i )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
if ( flseg - > stripe_type = = STRIPE_SPARSE )
return i ;
else
return nfs4_fl_calc_ds_index ( lseg , i ) ;
}
static struct nfs_fh *
select_ds_fh_from_commit ( struct pnfs_layout_segment * lseg , u32 i )
{
struct nfs4_filelayout_segment * flseg = FILELAYOUT_LSEG ( lseg ) ;
if ( flseg - > stripe_type = = STRIPE_SPARSE ) {
if ( flseg - > num_fh = = 1 )
i = 0 ;
else if ( flseg - > num_fh = = 0 )
/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
return NULL ;
}
return flseg - > fh_array [ i ] ;
}
2012-04-20 22:47:39 +04:00
static int filelayout_initiate_commit ( struct nfs_commit_data * data , int how )
2011-03-23 16:27:53 +03:00
{
struct pnfs_layout_segment * lseg = data - > lseg ;
struct nfs4_pnfs_ds * ds ;
2013-09-06 22:14:00 +04:00
struct rpc_clnt * ds_clnt ;
2011-03-23 16:27:53 +03:00
u32 idx ;
struct nfs_fh * fh ;
idx = calc_ds_index_from_commit ( lseg , data - > ds_commit_index ) ;
ds = nfs4_fl_prepare_ds ( lseg , idx ) ;
2013-09-06 22:14:00 +04:00
if ( ! ds )
goto out_err ;
ds_clnt = nfs4_find_or_create_ds_client ( ds - > ds_clp , data - > inode ) ;
if ( IS_ERR ( ds_clnt ) )
goto out_err ;
2012-04-28 01:53:51 +04:00
dprintk ( " %s ino %lu, how %d cl_count %d \n " , __func__ ,
data - > inode - > i_ino , how , atomic_read ( & ds - > ds_clp - > cl_count ) ) ;
2012-04-20 22:47:39 +04:00
data - > commit_done_cb = filelayout_commit_done_cb ;
2012-04-28 01:53:51 +04:00
atomic_inc ( & ds - > ds_clp - > cl_count ) ;
2011-03-23 16:27:53 +03:00
data - > ds_clp = ds - > ds_clp ;
fh = select_ds_fh_from_commit ( lseg , data - > ds_commit_index ) ;
if ( fh )
data - > args . fh = fh ;
2013-09-06 22:14:00 +04:00
return nfs_initiate_commit ( ds_clnt , data ,
2012-04-28 01:53:44 +04:00
& filelayout_commit_call_ops , how ,
RPC_TASK_SOFTCONN ) ;
2013-09-06 22:14:00 +04:00
out_err :
prepare_to_resend_writes ( data ) ;
filelayout_commit_release ( data ) ;
return - EAGAIN ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 01:16:40 +04:00
static int
2012-04-20 22:47:57 +04:00
transfer_commit_list ( struct list_head * src , struct list_head * dst ,
struct nfs_commit_info * cinfo , int max )
2012-03-16 01:16:40 +04:00
{
struct nfs_page * req , * tmp ;
int ret = 0 ;
list_for_each_entry_safe ( req , tmp , src , wb_list ) {
if ( ! nfs_lock_request ( req ) )
continue ;
2012-05-23 00:36:27 +04:00
kref_get ( & req - > wb_kref ) ;
2012-04-20 22:47:53 +04:00
if ( cond_resched_lock ( cinfo - > lock ) )
2012-03-17 19:59:30 +04:00
list_safe_reset_next ( req , tmp , wb_list ) ;
2012-04-20 22:47:53 +04:00
nfs_request_remove_commit_list ( req , cinfo ) ;
2012-03-16 01:16:40 +04:00
clear_bit ( PG_COMMIT_TO_DS , & req - > wb_flags ) ;
nfs_list_add_request ( req , dst ) ;
ret + + ;
2012-04-20 22:47:57 +04:00
if ( ( ret = = max ) & & ! cinfo - > dreq )
2012-03-16 01:16:40 +04:00
break ;
}
2012-04-20 22:47:57 +04:00
return ret ;
}
2014-05-15 19:56:40 +04:00
/* Note called with cinfo->lock held. */
2012-04-20 22:47:57 +04:00
static int
filelayout_scan_ds_commit_list ( struct pnfs_commit_bucket * bucket ,
struct nfs_commit_info * cinfo ,
int max )
{
struct list_head * src = & bucket - > written ;
struct list_head * dst = & bucket - > committing ;
int ret ;
ret = transfer_commit_list ( src , dst , cinfo , max ) ;
2012-04-20 22:47:38 +04:00
if ( ret ) {
2012-04-20 22:47:53 +04:00
cinfo - > ds - > nwritten - = ret ;
cinfo - > ds - > ncommitting + = ret ;
2012-04-20 22:47:38 +04:00
bucket - > clseg = bucket - > wlseg ;
if ( list_empty ( src ) )
bucket - > wlseg = NULL ;
else
2012-09-19 04:57:08 +04:00
pnfs_get_lseg ( bucket - > clseg ) ;
2012-04-20 22:47:38 +04:00
}
2012-03-16 01:16:40 +04:00
return ret ;
}
2012-03-09 02:29:35 +04:00
/* Move reqs from written to committing lists, returning count of number moved.
2012-04-20 22:47:53 +04:00
* Note called with cinfo - > lock held .
2012-03-09 02:29:35 +04:00
*/
2012-04-20 22:47:53 +04:00
static int filelayout_scan_commit_lists ( struct nfs_commit_info * cinfo ,
int max )
2012-03-09 02:29:35 +04:00
{
int i , rv = 0 , cnt ;
2012-04-20 22:47:53 +04:00
for ( i = 0 ; i < cinfo - > ds - > nbuckets & & max ! = 0 ; i + + ) {
cnt = filelayout_scan_ds_commit_list ( & cinfo - > ds - > buckets [ i ] ,
cinfo , max ) ;
2012-03-09 02:29:35 +04:00
max - = cnt ;
rv + = cnt ;
}
return rv ;
}
2012-04-20 22:47:57 +04:00
/* Pull everything off the committing lists and dump into @dst */
static void filelayout_recover_commit_reqs ( struct list_head * dst ,
struct nfs_commit_info * cinfo )
{
struct pnfs_commit_bucket * b ;
2014-05-15 19:56:40 +04:00
struct pnfs_layout_segment * freeme ;
2012-04-20 22:47:57 +04:00
int i ;
2014-05-15 19:56:40 +04:00
restart :
pnfs: fix BUG in filelayout_recover_commit_reqs
cond_resched_lock(cinfo->lock) is called everywhere else while holding
the cinfo->lock spinlock. Not holding this lock while calling
transfer_commit_list in filelayout_recover_commit_reqs causes the BUG
below.
It's true that we can't hold this lock while calling pnfs_put_lseg,
because that might try to lock the inode lock - which might be the
same lock as cinfo->lock.
To reproduce, mount a 2 DS pynfs server and run an O_DIRECT command
that crosses a stripe boundary and is not page aligned, such as:
dd if=/dev/zero of=/mnt/f bs=17000 count=1 oflag=direct
BUG: sleeping function called from invalid context at linux/fs/nfs/nfs4filelayout.c:1161
in_atomic(): 0, irqs_disabled(): 0, pid: 27, name: kworker/0:1
2 locks held by kworker/0:1/27:
#0: (events){.+.+.+}, at: [<ffffffff810501d7>] process_one_work+0x175/0x3a5
#1: ((&dreq->work)){+.+...}, at: [<ffffffff810501d7>] process_one_work+0x175/0x3a5
CPU: 0 PID: 27 Comm: kworker/0:1 Not tainted 3.13.0-rc3-branch-dros_testing+ #21
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013
Workqueue: events nfs_direct_write_schedule_work [nfs]
0000000000000000 ffff88007a39bbb8 ffffffff81491256 ffff88007b87a130 ffff88007a39bbd8 ffffffff8105f103 ffff880079614000 ffff880079617d40 ffff88007a39bc20 ffffffffa011603e ffff880078988b98 0000000000000000
Call Trace:
[<ffffffff81491256>] dump_stack+0x4d/0x66
[<ffffffff8105f103>] __might_sleep+0x100/0x105
[<ffffffffa011603e>] transfer_commit_list+0x94/0xf1 [nfs_layout_nfsv41_files]
[<ffffffffa01160d6>] filelayout_recover_commit_reqs+0x3b/0x68 [nfs_layout_nfsv41_files]
[<ffffffffa00ba53a>] nfs_direct_write_reschedule+0x9f/0x1d6 [nfs]
[<ffffffff810705df>] ? mark_lock+0x1df/0x224
[<ffffffff8106e617>] ? trace_hardirqs_off_caller+0x37/0xa4
[<ffffffff8106e691>] ? trace_hardirqs_off+0xd/0xf
[<ffffffffa00ba8f8>] nfs_direct_write_schedule_work+0x9d/0xb7 [nfs]
[<ffffffff810501d7>] ? process_one_work+0x175/0x3a5
[<ffffffff81050258>] process_one_work+0x1f6/0x3a5
[<ffffffff810501d7>] ? process_one_work+0x175/0x3a5
[<ffffffff8105187e>] worker_thread+0x149/0x1f5
[<ffffffff81051735>] ? rescuer_thread+0x28d/0x28d
[<ffffffff81056d74>] kthread+0xd2/0xda
[<ffffffff81056ca2>] ? __kthread_parkme+0x61/0x61
[<ffffffff8149e66c>] ret_from_fork+0x7c/0xb0
[<ffffffff81056ca2>] ? __kthread_parkme+0x61/0x61
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-01-22 00:21:33 +04:00
spin_lock ( cinfo - > lock ) ;
2012-04-20 22:47:57 +04:00
for ( i = 0 , b = cinfo - > ds - > buckets ; i < cinfo - > ds - > nbuckets ; i + + , b + + ) {
if ( transfer_commit_list ( & b - > written , dst , cinfo , 0 ) ) {
2014-05-15 19:56:40 +04:00
freeme = b - > wlseg ;
2012-04-20 22:47:57 +04:00
b - > wlseg = NULL ;
2014-05-15 19:56:40 +04:00
spin_unlock ( cinfo - > lock ) ;
pnfs_put_lseg ( freeme ) ;
goto restart ;
2012-04-20 22:47:57 +04:00
}
}
cinfo - > ds - > nwritten = 0 ;
pnfs: fix BUG in filelayout_recover_commit_reqs
cond_resched_lock(cinfo->lock) is called everywhere else while holding
the cinfo->lock spinlock. Not holding this lock while calling
transfer_commit_list in filelayout_recover_commit_reqs causes the BUG
below.
It's true that we can't hold this lock while calling pnfs_put_lseg,
because that might try to lock the inode lock - which might be the
same lock as cinfo->lock.
To reproduce, mount a 2 DS pynfs server and run an O_DIRECT command
that crosses a stripe boundary and is not page aligned, such as:
dd if=/dev/zero of=/mnt/f bs=17000 count=1 oflag=direct
BUG: sleeping function called from invalid context at linux/fs/nfs/nfs4filelayout.c:1161
in_atomic(): 0, irqs_disabled(): 0, pid: 27, name: kworker/0:1
2 locks held by kworker/0:1/27:
#0: (events){.+.+.+}, at: [<ffffffff810501d7>] process_one_work+0x175/0x3a5
#1: ((&dreq->work)){+.+...}, at: [<ffffffff810501d7>] process_one_work+0x175/0x3a5
CPU: 0 PID: 27 Comm: kworker/0:1 Not tainted 3.13.0-rc3-branch-dros_testing+ #21
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013
Workqueue: events nfs_direct_write_schedule_work [nfs]
0000000000000000 ffff88007a39bbb8 ffffffff81491256 ffff88007b87a130 ffff88007a39bbd8 ffffffff8105f103 ffff880079614000 ffff880079617d40 ffff88007a39bc20 ffffffffa011603e ffff880078988b98 0000000000000000
Call Trace:
[<ffffffff81491256>] dump_stack+0x4d/0x66
[<ffffffff8105f103>] __might_sleep+0x100/0x105
[<ffffffffa011603e>] transfer_commit_list+0x94/0xf1 [nfs_layout_nfsv41_files]
[<ffffffffa01160d6>] filelayout_recover_commit_reqs+0x3b/0x68 [nfs_layout_nfsv41_files]
[<ffffffffa00ba53a>] nfs_direct_write_reschedule+0x9f/0x1d6 [nfs]
[<ffffffff810705df>] ? mark_lock+0x1df/0x224
[<ffffffff8106e617>] ? trace_hardirqs_off_caller+0x37/0xa4
[<ffffffff8106e691>] ? trace_hardirqs_off+0xd/0xf
[<ffffffffa00ba8f8>] nfs_direct_write_schedule_work+0x9d/0xb7 [nfs]
[<ffffffff810501d7>] ? process_one_work+0x175/0x3a5
[<ffffffff81050258>] process_one_work+0x1f6/0x3a5
[<ffffffff810501d7>] ? process_one_work+0x175/0x3a5
[<ffffffff8105187e>] worker_thread+0x149/0x1f5
[<ffffffff81051735>] ? rescuer_thread+0x28d/0x28d
[<ffffffff81056d74>] kthread+0xd2/0xda
[<ffffffff81056ca2>] ? __kthread_parkme+0x61/0x61
[<ffffffff8149e66c>] ret_from_fork+0x7c/0xb0
[<ffffffff81056ca2>] ? __kthread_parkme+0x61/0x61
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
2014-01-22 00:21:33 +04:00
spin_unlock ( cinfo - > lock ) ;
2012-04-20 22:47:57 +04:00
}
2014-07-18 04:42:17 +04:00
/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
* for @ page
* @ cinfo - commit info for current inode
* @ page - page to search for matching head request
*
* Returns a the head request if one is found , otherwise returns NULL .
*/
static struct nfs_page *
filelayout_search_commit_reqs ( struct nfs_commit_info * cinfo , struct page * page )
{
struct nfs_page * freq , * t ;
struct pnfs_commit_bucket * b ;
int i ;
/* Linearly search the commit lists for each bucket until a matching
* request is found */
for ( i = 0 , b = cinfo - > ds - > buckets ; i < cinfo - > ds - > nbuckets ; i + + , b + + ) {
list_for_each_entry_safe ( freq , t , & b - > written , wb_list ) {
if ( freq - > wb_page = = page )
return freq - > wb_head ;
}
list_for_each_entry_safe ( freq , t , & b - > committing , wb_list ) {
if ( freq - > wb_page = = page )
return freq - > wb_head ;
}
}
return NULL ;
}
2014-07-03 09:07:46 +04:00
static void filelayout_retry_commit ( struct nfs_commit_info * cinfo , int idx )
{
struct pnfs_ds_commit_info * fl_cinfo = cinfo - > ds ;
2014-09-10 01:51:47 +04:00
struct pnfs_commit_bucket * bucket ;
2014-07-03 09:07:46 +04:00
struct pnfs_layout_segment * freeme ;
int i ;
2014-09-10 01:51:47 +04:00
for ( i = idx ; i < fl_cinfo - > nbuckets ; i + + ) {
bucket = & fl_cinfo - > buckets [ i ] ;
2014-07-03 09:07:46 +04:00
if ( list_empty ( & bucket - > committing ) )
continue ;
nfs_retry_commit ( & bucket - > committing , bucket - > clseg , cinfo ) ;
spin_lock ( cinfo - > lock ) ;
freeme = bucket - > clseg ;
bucket - > clseg = NULL ;
spin_unlock ( cinfo - > lock ) ;
pnfs_put_lseg ( freeme ) ;
}
}
2012-03-16 21:52:45 +04:00
static unsigned int
2012-04-20 22:47:53 +04:00
alloc_ds_commits ( struct nfs_commit_info * cinfo , struct list_head * list )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:53 +04:00
struct pnfs_ds_commit_info * fl_cinfo ;
struct pnfs_commit_bucket * bucket ;
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data ;
2014-07-03 09:07:46 +04:00
int i ;
2012-03-16 21:52:45 +04:00
unsigned int nreq = 0 ;
2011-03-23 16:27:53 +03:00
2012-04-20 22:47:53 +04:00
fl_cinfo = cinfo - > ds ;
2012-04-20 22:47:38 +04:00
bucket = fl_cinfo - > buckets ;
for ( i = 0 ; i < fl_cinfo - > nbuckets ; i + + , bucket + + ) {
if ( list_empty ( & bucket - > committing ) )
2011-03-23 16:27:53 +03:00
continue ;
data = nfs_commitdata_alloc ( ) ;
if ( ! data )
2012-03-16 21:52:45 +04:00
break ;
2011-03-23 16:27:53 +03:00
data - > ds_commit_index = i ;
2014-05-15 19:56:40 +04:00
spin_lock ( cinfo - > lock ) ;
2012-04-20 22:47:38 +04:00
data - > lseg = bucket - > clseg ;
bucket - > clseg = NULL ;
2014-05-15 19:56:40 +04:00
spin_unlock ( cinfo - > lock ) ;
2011-03-23 16:27:53 +03:00
list_add ( & data - > pages , list ) ;
2012-03-16 21:52:45 +04:00
nreq + + ;
2011-03-23 16:27:53 +03:00
}
2012-03-16 21:52:45 +04:00
/* Clean up on error */
2014-07-03 09:07:46 +04:00
filelayout_retry_commit ( cinfo , i ) ;
2011-03-23 16:27:53 +03:00
/* Caller will clean up entries put on list */
2012-03-16 21:52:45 +04:00
return nreq ;
2011-03-23 16:27:53 +03:00
}
/* This follows nfs_commit_list pretty closely */
static int
filelayout_commit_pagelist ( struct inode * inode , struct list_head * mds_pages ,
2012-04-20 22:47:53 +04:00
int how , struct nfs_commit_info * cinfo )
2011-03-23 16:27:53 +03:00
{
2012-04-20 22:47:39 +04:00
struct nfs_commit_data * data , * tmp ;
2011-03-23 16:27:53 +03:00
LIST_HEAD ( list ) ;
2012-03-16 21:52:45 +04:00
unsigned int nreq = 0 ;
2011-03-23 16:27:53 +03:00
if ( ! list_empty ( mds_pages ) ) {
data = nfs_commitdata_alloc ( ) ;
2012-03-16 21:52:45 +04:00
if ( data ! = NULL ) {
data - > lseg = NULL ;
list_add ( & data - > pages , & list ) ;
nreq + + ;
2014-07-03 09:07:46 +04:00
} else {
2012-04-20 22:47:53 +04:00
nfs_retry_commit ( mds_pages , NULL , cinfo ) ;
2014-07-03 09:07:46 +04:00
filelayout_retry_commit ( cinfo , 0 ) ;
cinfo - > completion_ops - > error_cleanup ( NFS_I ( inode ) ) ;
return - ENOMEM ;
}
2011-03-23 16:27:53 +03:00
}
2012-04-20 22:47:53 +04:00
nreq + = alloc_ds_commits ( cinfo , & list ) ;
2012-03-16 21:52:45 +04:00
if ( nreq = = 0 ) {
2012-04-20 22:47:54 +04:00
cinfo - > completion_ops - > error_cleanup ( NFS_I ( inode ) ) ;
2012-03-16 21:52:45 +04:00
goto out ;
}
2012-04-20 22:47:53 +04:00
atomic_add ( nreq , & cinfo - > mds - > rpcs_out ) ;
2011-03-23 16:27:53 +03:00
list_for_each_entry_safe ( data , tmp , & list , pages ) {
list_del_init ( & data - > pages ) ;
if ( ! data - > lseg ) {
2012-04-20 22:47:54 +04:00
nfs_init_commit ( data , mds_pages , NULL , cinfo ) ;
2012-04-20 22:47:39 +04:00
nfs_initiate_commit ( NFS_CLIENT ( inode ) , data ,
2012-04-28 01:53:44 +04:00
data - > mds_ops , how , 0 ) ;
2011-03-23 16:27:53 +03:00
} else {
2012-04-20 22:47:53 +04:00
struct pnfs_commit_bucket * buckets ;
2012-04-20 22:47:38 +04:00
2012-04-20 22:47:53 +04:00
buckets = cinfo - > ds - > buckets ;
2012-04-20 22:47:54 +04:00
nfs_init_commit ( data , & buckets [ data - > ds_commit_index ] . committing , data - > lseg , cinfo ) ;
2011-03-23 16:27:53 +03:00
filelayout_initiate_commit ( data , how ) ;
}
}
2012-03-16 21:52:45 +04:00
out :
2012-04-20 22:47:53 +04:00
cinfo - > ds - > ncommitting = 0 ;
2012-03-16 21:52:45 +04:00
return PNFS_ATTEMPTED ;
2011-03-23 16:27:53 +03:00
}
2011-05-20 15:47:33 +04:00
static void
filelayout_free_deveiceid_node ( struct nfs4_deviceid_node * d )
{
nfs4_fl_free_deviceid ( container_of ( d , struct nfs4_file_layout_dsaddr , id_node ) ) ;
}
2012-04-20 22:47:38 +04:00
static struct pnfs_layout_hdr *
filelayout_alloc_layout_hdr ( struct inode * inode , gfp_t gfp_flags )
{
struct nfs4_filelayout * flo ;
flo = kzalloc ( sizeof ( * flo ) , gfp_flags ) ;
2014-05-30 04:06:55 +04:00
return flo ! = NULL ? & flo - > generic_hdr : NULL ;
2012-04-20 22:47:38 +04:00
}
static void
filelayout_free_layout_hdr ( struct pnfs_layout_hdr * lo )
{
kfree ( FILELAYOUT_FROM_HDR ( lo ) ) ;
}
2012-04-20 22:47:53 +04:00
static struct pnfs_ds_commit_info *
filelayout_get_ds_info ( struct inode * inode )
{
2012-04-24 22:50:34 +04:00
struct pnfs_layout_hdr * layout = NFS_I ( inode ) - > layout ;
if ( layout = = NULL )
return NULL ;
else
return & FILELAYOUT_FROM_HDR ( layout ) - > commit_info ;
2012-04-20 22:47:53 +04:00
}
2010-10-20 08:18:00 +04:00
static struct pnfs_layoutdriver_type filelayout_type = {
2011-03-01 04:34:21 +03:00
. id = LAYOUT_NFSV4_1_FILES ,
. name = " LAYOUT_NFSV4_1_FILES " ,
. owner = THIS_MODULE ,
2012-04-20 22:47:38 +04:00
. alloc_layout_hdr = filelayout_alloc_layout_hdr ,
. free_layout_hdr = filelayout_free_layout_hdr ,
2011-03-01 04:34:21 +03:00
. alloc_lseg = filelayout_alloc_lseg ,
. free_lseg = filelayout_free_lseg ,
2011-06-10 21:30:23 +04:00
. pg_read_ops = & filelayout_pg_read_ops ,
. pg_write_ops = & filelayout_pg_write_ops ,
2012-04-20 22:47:53 +04:00
. get_ds_info = & filelayout_get_ds_info ,
2012-03-16 01:16:40 +04:00
. mark_request_commit = filelayout_mark_request_commit ,
. clear_request_commit = filelayout_clear_request_commit ,
2012-03-09 02:29:35 +04:00
. scan_commit_lists = filelayout_scan_commit_lists ,
2012-04-20 22:47:57 +04:00
. recover_commit_reqs = filelayout_recover_commit_reqs ,
2014-07-18 04:42:17 +04:00
. search_commit_reqs = filelayout_search_commit_reqs ,
2011-03-23 16:27:53 +03:00
. commit_pagelist = filelayout_commit_pagelist ,
2011-03-01 04:34:19 +03:00
. read_pagelist = filelayout_read_pagelist ,
2011-03-03 18:13:45 +03:00
. write_pagelist = filelayout_write_pagelist ,
2011-05-20 15:47:33 +04:00
. free_deviceid_node = filelayout_free_deveiceid_node ,
2010-10-20 08:18:00 +04:00
} ;
static int __init nfs4filelayout_init ( void )
{
printk ( KERN_INFO " %s: NFSv4 File Layout Driver Registering... \n " ,
__func__ ) ;
return pnfs_register_layoutdriver ( & filelayout_type ) ;
}
static void __exit nfs4filelayout_exit ( void )
{
printk ( KERN_INFO " %s: NFSv4 File Layout Driver Unregistering... \n " ,
__func__ ) ;
pnfs_unregister_layoutdriver ( & filelayout_type ) ;
}
2011-07-16 03:18:42 +04:00
MODULE_ALIAS ( " nfs-layouttype4-1 " ) ;
2010-10-20 08:18:00 +04:00
module_init ( nfs4filelayout_init ) ;
module_exit ( nfs4filelayout_exit ) ;