2011-07-30 20:52:39 -04:00
/*
* linux / fs / nfs / blocklayout / blocklayout . c
*
* Module for the NFSv4 .1 pNFS block layout driver .
*
* Copyright ( c ) 2006 The Regents of the University of Michigan .
* All rights reserved .
*
* Andy Adamson < andros @ citi . umich . edu >
* Fred Isaman < iisaman @ umich . edu >
*
* permission is granted to use , copy , create derivative works and
* redistribute this software and such derivative works for any purpose ,
* so long as the name of the university of michigan is not used in
* any advertising or publicity pertaining to the use or distribution
* of this software without specific , written prior authorization . if
* the above copyright notice or any other identification of the
* university of michigan is included in any copy of any portion of
* this software , then the disclaimer below must also be included .
*
* this software is provided as is , without representation from the
* university of michigan as to its fitness for any purpose , and without
* warranty by the university of michigan of any kind , either express
* or implied , including without limitation the implied warranties of
* merchantability and fitness for a particular purpose . the regents
* of the university of michigan shall not be liable for any damages ,
* including special , indirect , incidental , or consequential damages ,
* with respect to any claim arising out or in connection with the use
* of the software , even if it has been or is hereafter advised of the
* possibility of such damages .
*/
2011-07-30 20:52:53 -04:00
2011-07-30 20:52:39 -04:00
# include <linux/module.h>
# include <linux/init.h>
2011-07-30 20:52:42 -04:00
# include <linux/mount.h>
# include <linux/namei.h>
2011-07-30 20:52:53 -04:00
# include <linux/bio.h> /* struct bio */
2011-08-02 09:57:35 +02:00
# include <linux/prefetch.h>
2012-09-25 14:55:57 +08:00
# include <linux/pagevec.h>
2011-07-30 20:52:39 -04:00
2012-04-09 22:33:39 -04:00
# include "../pnfs.h"
2012-11-26 14:20:49 -05:00
# include "../nfs4session.h"
2012-04-09 22:33:39 -04:00
# include "../internal.h"
2011-07-30 20:52:39 -04:00
# include "blocklayout.h"
# define NFSDBG_FACILITY NFSDBG_PNFS_LD
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Andy Adamson <andros@citi.umich.edu> " ) ;
MODULE_DESCRIPTION ( " The NFSv4.1 pNFS Block layout driver " ) ;
2014-09-10 08:23:34 -07:00
static bool is_hole ( struct pnfs_block_extent * be )
2011-07-30 20:52:53 -04:00
{
2014-09-10 08:23:34 -07:00
switch ( be - > be_state ) {
case PNFS_BLOCK_NONE_DATA :
return true ;
case PNFS_BLOCK_INVALID_DATA :
return be - > be_tag ? false : true ;
default :
return false ;
}
2011-07-30 20:52:54 -04:00
}
2011-07-30 20:52:53 -04:00
/* The data we are handed might be spread across several bios. We need
* to track when the last one is finished .
*/
struct parallel_io {
struct kref refcnt ;
2014-09-10 08:23:34 -07:00
void ( * pnfs_callback ) ( void * data ) ;
2011-07-30 20:52:53 -04:00
void * data ;
} ;
static inline struct parallel_io * alloc_parallel ( void * data )
{
struct parallel_io * rv ;
rv = kmalloc ( sizeof ( * rv ) , GFP_NOFS ) ;
if ( rv ) {
rv - > data = data ;
kref_init ( & rv - > refcnt ) ;
}
return rv ;
}
static inline void get_parallel ( struct parallel_io * p )
{
kref_get ( & p - > refcnt ) ;
}
static void destroy_parallel ( struct kref * kref )
{
struct parallel_io * p = container_of ( kref , struct parallel_io , refcnt ) ;
dprintk ( " %s enter \n " , __func__ ) ;
2014-09-10 08:23:34 -07:00
p - > pnfs_callback ( p - > data ) ;
2011-07-30 20:52:53 -04:00
kfree ( p ) ;
}
static inline void put_parallel ( struct parallel_io * p )
{
kref_put ( & p - > refcnt , destroy_parallel ) ;
}
static struct bio *
bl_submit_bio ( int rw , struct bio * bio )
{
if ( bio ) {
get_parallel ( bio - > bi_private ) ;
dprintk ( " %s submitting %s bio %u@%llu \n " , __func__ ,
2013-10-11 15:44:27 -07:00
rw = = READ ? " read " : " write " , bio - > bi_iter . bi_size ,
( unsigned long long ) bio - > bi_iter . bi_sector ) ;
2011-07-30 20:52:53 -04:00
submit_bio ( rw , bio ) ;
}
return NULL ;
}
2014-09-10 17:37:27 -07:00
static struct bio *
bl_alloc_init_bio ( int npg , struct block_device * bdev , sector_t disk_sector ,
void ( * end_io ) ( struct bio * , int err ) , struct parallel_io * par )
2011-07-30 20:52:53 -04:00
{
struct bio * bio ;
2012-01-12 23:18:48 +08:00
npg = min ( npg , BIO_MAX_PAGES ) ;
2011-07-30 20:52:53 -04:00
bio = bio_alloc ( GFP_NOIO , npg ) ;
2012-01-12 23:18:48 +08:00
if ( ! bio & & ( current - > flags & PF_MEMALLOC ) ) {
while ( ! bio & & ( npg / = 2 ) )
bio = bio_alloc ( GFP_NOIO , npg ) ;
}
2011-07-30 20:52:53 -04:00
2012-01-12 23:18:48 +08:00
if ( bio ) {
2014-09-10 17:37:27 -07:00
bio - > bi_iter . bi_sector = disk_sector ;
bio - > bi_bdev = bdev ;
2012-01-12 23:18:48 +08:00
bio - > bi_end_io = end_io ;
bio - > bi_private = par ;
}
2011-07-30 20:52:53 -04:00
return bio ;
}
2014-09-10 17:37:27 -07:00
static struct bio *
do_add_page_to_bio ( struct bio * bio , int npg , int rw , sector_t isect ,
struct page * page , struct pnfs_block_dev_map * map ,
struct pnfs_block_extent * be ,
void ( * end_io ) ( struct bio * , int err ) ,
struct parallel_io * par , unsigned int offset , int * len )
2011-07-30 20:52:53 -04:00
{
2014-09-10 17:37:27 -07:00
struct pnfs_block_dev * dev =
container_of ( be - > be_device , struct pnfs_block_dev , node ) ;
u64 disk_addr , end ;
2012-08-24 00:27:51 +08:00
dprintk ( " %s: npg %d rw %d isect %llu offset %u len %d \n " , __func__ ,
2014-09-10 17:37:27 -07:00
npg , rw , ( unsigned long long ) isect , offset , * len ) ;
/* translate to device offset */
isect + = be - > be_v_offset ;
isect - = be - > be_f_offset ;
/* translate to physical disk offset */
disk_addr = ( u64 ) isect < < SECTOR_SHIFT ;
if ( disk_addr < map - > start | | disk_addr > = map - > start + map - > len ) {
if ( ! dev - > map ( dev , disk_addr , map ) )
return ERR_PTR ( - EIO ) ;
bio = bl_submit_bio ( rw , bio ) ;
}
disk_addr + = map - > disk_offset ;
disk_addr - = map - > start ;
/* limit length to what the device mapping allows */
end = disk_addr + * len ;
if ( end > = map - > start + map - > len )
* len = map - > start + map - > len - disk_addr ;
2011-07-30 20:52:53 -04:00
retry :
if ( ! bio ) {
2014-09-10 17:37:27 -07:00
bio = bl_alloc_init_bio ( npg , map - > bdev ,
disk_addr > > SECTOR_SHIFT , end_io , par ) ;
2011-07-30 20:52:53 -04:00
if ( ! bio )
return ERR_PTR ( - ENOMEM ) ;
}
2014-09-10 17:37:27 -07:00
if ( bio_add_page ( bio , page , * len , offset ) < * len ) {
2011-07-30 20:52:53 -04:00
bio = bl_submit_bio ( rw , bio ) ;
goto retry ;
}
return bio ;
}
static void bl_end_io_read ( struct bio * bio , int err )
{
struct parallel_io * par = bio - > bi_private ;
2013-11-07 12:20:26 -08:00
if ( err ) {
2014-06-09 11:48:35 -04:00
struct nfs_pgio_header * header = par - > data ;
2012-04-20 14:47:44 -04:00
if ( ! header - > pnfs_error )
header - > pnfs_error = - EIO ;
pnfs_set_lo_fail ( header - > lseg ) ;
2011-07-30 20:52:53 -04:00
}
2014-09-10 08:23:33 -07:00
2011-07-30 20:52:53 -04:00
bio_put ( bio ) ;
put_parallel ( par ) ;
}
static void bl_read_cleanup ( struct work_struct * work )
{
struct rpc_task * task ;
2014-06-09 11:48:35 -04:00
struct nfs_pgio_header * hdr ;
2011-07-30 20:52:53 -04:00
dprintk ( " %s enter \n " , __func__ ) ;
task = container_of ( work , struct rpc_task , u . tk_work ) ;
2014-06-09 11:48:35 -04:00
hdr = container_of ( task , struct nfs_pgio_header , task ) ;
pnfs_ld_read_done ( hdr ) ;
2011-07-30 20:52:53 -04:00
}
static void
2014-09-10 08:23:34 -07:00
bl_end_par_io_read ( void * data )
2011-07-30 20:52:53 -04:00
{
2014-06-09 11:48:35 -04:00
struct nfs_pgio_header * hdr = data ;
2011-07-30 20:52:53 -04:00
2014-06-09 11:48:35 -04:00
hdr - > task . tk_status = hdr - > pnfs_error ;
INIT_WORK ( & hdr - > task . u . tk_work , bl_read_cleanup ) ;
schedule_work ( & hdr - > task . u . tk_work ) ;
2011-07-30 20:52:53 -04:00
}
2011-07-30 20:52:39 -04:00
static enum pnfs_try_status
2014-09-10 08:23:34 -07:00
bl_read_pagelist ( struct nfs_pgio_header * header )
2011-07-30 20:52:39 -04:00
{
2014-09-10 08:23:34 -07:00
struct pnfs_block_layout * bl = BLK_LSEG2EXT ( header - > lseg ) ;
2014-09-10 17:37:27 -07:00
struct pnfs_block_dev_map map = { . start = NFS4_MAX_UINT64 } ;
2011-07-30 20:52:53 -04:00
struct bio * bio = NULL ;
2014-09-10 08:23:34 -07:00
struct pnfs_block_extent be ;
2011-07-30 20:52:53 -04:00
sector_t isect , extent_length = 0 ;
struct parallel_io * par ;
2014-09-10 08:23:34 -07:00
loff_t f_offset = header - > args . offset ;
size_t bytes_left = header - > args . count ;
2012-08-24 00:27:52 +08:00
unsigned int pg_offset , pg_len ;
2014-09-10 08:23:34 -07:00
struct page * * pages = header - > args . pages ;
int pg_index = header - > args . pgbase > > PAGE_CACHE_SHIFT ;
2012-08-24 00:27:52 +08:00
const bool is_dio = ( header - > dreq ! = NULL ) ;
2014-08-21 11:09:28 -05:00
struct blk_plug plug ;
2014-09-10 08:23:34 -07:00
int i ;
2011-07-30 20:52:53 -04:00
2012-03-20 14:12:46 -04:00
dprintk ( " %s enter nr_pages %u offset %lld count %u \n " , __func__ ,
2014-09-10 08:23:34 -07:00
header - > page_array . npages , f_offset ,
( unsigned int ) header - > args . count ) ;
2011-07-30 20:52:53 -04:00
2014-09-10 08:23:34 -07:00
par = alloc_parallel ( header ) ;
2011-07-30 20:52:53 -04:00
if ( ! par )
2014-09-10 08:23:34 -07:00
return PNFS_NOT_ATTEMPTED ;
2011-07-30 20:52:53 -04:00
par - > pnfs_callback = bl_end_par_io_read ;
2014-08-21 11:09:28 -05:00
blk_start_plug ( & plug ) ;
2011-07-30 20:52:53 -04:00
isect = ( sector_t ) ( f_offset > > SECTOR_SHIFT ) ;
/* Code assumes extents are page-aligned */
2014-09-10 08:23:34 -07:00
for ( i = pg_index ; i < header - > page_array . npages ; i + + ) {
2014-08-21 11:09:29 -05:00
if ( extent_length < = 0 ) {
2011-07-30 20:52:53 -04:00
/* We've used up the previous extent */
bio = bl_submit_bio ( READ , bio ) ;
2014-09-10 08:23:34 -07:00
2011-07-30 20:52:53 -04:00
/* Get the next one */
2014-09-10 08:23:34 -07:00
if ( ! ext_tree_lookup ( bl , isect , & be , false ) ) {
2012-04-20 14:47:44 -04:00
header - > pnfs_error = - EIO ;
2011-07-30 20:52:53 -04:00
goto out ;
}
2014-09-10 08:23:34 -07:00
extent_length = be . be_length - ( isect - be . be_f_offset ) ;
2011-07-30 20:52:53 -04:00
}
2012-08-24 00:27:52 +08:00
2014-09-10 08:23:32 -07:00
pg_offset = f_offset & ~ PAGE_CACHE_MASK ;
2012-08-24 00:27:52 +08:00
if ( is_dio ) {
if ( pg_offset + bytes_left > PAGE_CACHE_SIZE )
pg_len = PAGE_CACHE_SIZE - pg_offset ;
else
pg_len = bytes_left ;
} else {
2014-09-10 08:23:32 -07:00
BUG_ON ( pg_offset ! = 0 ) ;
2012-08-24 00:27:52 +08:00
pg_len = PAGE_CACHE_SIZE ;
}
2014-09-10 17:37:27 -07:00
isect + = ( pg_offset > > SECTOR_SHIFT ) ;
extent_length - = ( pg_offset > > SECTOR_SHIFT ) ;
2014-09-10 08:23:34 -07:00
if ( is_hole ( & be ) ) {
2011-07-30 20:52:53 -04:00
bio = bl_submit_bio ( READ , bio ) ;
/* Fill hole w/ zeroes w/o accessing device */
dprintk ( " %s Zeroing page for hole \n " , __func__ ) ;
2012-08-24 00:27:52 +08:00
zero_user_segment ( pages [ i ] , pg_offset , pg_len ) ;
2014-09-10 17:37:27 -07:00
/* invalidate map */
map . start = NFS4_MAX_UINT64 ;
2011-07-30 20:52:53 -04:00
} else {
2014-06-09 11:48:34 -04:00
bio = do_add_page_to_bio ( bio ,
2014-09-10 08:23:34 -07:00
header - > page_array . npages - i ,
2012-04-20 14:47:45 -04:00
READ ,
2014-09-10 17:37:27 -07:00
isect , pages [ i ] , & map , & be ,
2012-08-24 00:27:52 +08:00
bl_end_io_read , par ,
2014-09-10 17:37:27 -07:00
pg_offset , & pg_len ) ;
2011-07-30 20:52:53 -04:00
if ( IS_ERR ( bio ) ) {
2012-04-20 14:47:44 -04:00
header - > pnfs_error = PTR_ERR ( bio ) ;
2011-09-22 21:50:16 -04:00
bio = NULL ;
2011-07-30 20:52:53 -04:00
goto out ;
}
}
2012-08-24 00:27:52 +08:00
isect + = ( pg_len > > SECTOR_SHIFT ) ;
2014-08-21 11:09:29 -05:00
extent_length - = ( pg_len > > SECTOR_SHIFT ) ;
2014-09-10 17:37:27 -07:00
f_offset + = pg_len ;
bytes_left - = pg_len ;
2011-07-30 20:52:53 -04:00
}
2012-04-20 14:47:44 -04:00
if ( ( isect < < SECTOR_SHIFT ) > = header - > inode - > i_size ) {
2014-09-10 08:23:34 -07:00
header - > res . eof = 1 ;
header - > res . count = header - > inode - > i_size - header - > args . offset ;
2011-07-30 20:52:53 -04:00
} else {
2014-09-10 08:23:34 -07:00
header - > res . count = ( isect < < SECTOR_SHIFT ) - header - > args . offset ;
2011-07-30 20:52:53 -04:00
}
out :
bl_submit_bio ( READ , bio ) ;
2014-08-21 11:09:28 -05:00
blk_finish_plug ( & plug ) ;
2011-07-30 20:52:53 -04:00
put_parallel ( par ) ;
return PNFS_ATTEMPTED ;
2011-07-30 20:52:55 -04:00
}
2011-07-30 20:52:54 -04:00
static void bl_end_io_write ( struct bio * bio , int err )
{
struct parallel_io * par = bio - > bi_private ;
const int uptodate = test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
2014-06-09 11:48:35 -04:00
struct nfs_pgio_header * header = par - > data ;
2011-07-30 20:52:54 -04:00
if ( ! uptodate ) {
2012-04-20 14:47:44 -04:00
if ( ! header - > pnfs_error )
header - > pnfs_error = - EIO ;
pnfs_set_lo_fail ( header - > lseg ) ;
2011-07-30 20:52:54 -04:00
}
bio_put ( bio ) ;
put_parallel ( par ) ;
}
/* Function scheduled for call during bl_end_par_io_write,
* it marks sectors as written and extends the commitlist .
*/
static void bl_write_cleanup ( struct work_struct * work )
{
2014-09-10 08:23:34 -07:00
struct rpc_task * task = container_of ( work , struct rpc_task , u . tk_work ) ;
struct nfs_pgio_header * hdr =
container_of ( task , struct nfs_pgio_header , task ) ;
2011-07-30 20:52:54 -04:00
dprintk ( " %s enter \n " , __func__ ) ;
2014-09-10 08:23:34 -07:00
2014-06-09 11:48:35 -04:00
if ( likely ( ! hdr - > pnfs_error ) ) {
2014-09-10 08:23:34 -07:00
struct pnfs_block_layout * bl = BLK_LSEG2EXT ( hdr - > lseg ) ;
u64 start = hdr - > args . offset & ( loff_t ) PAGE_CACHE_MASK ;
u64 end = ( hdr - > args . offset + hdr - > args . count +
PAGE_CACHE_SIZE - 1 ) & ( loff_t ) PAGE_CACHE_MASK ;
ext_tree_mark_written ( bl , start > > SECTOR_SHIFT ,
( end - start ) > > SECTOR_SHIFT ) ;
2011-07-30 20:52:55 -04:00
}
2014-09-10 08:23:34 -07:00
2014-06-09 11:48:35 -04:00
pnfs_ld_write_done ( hdr ) ;
2011-07-30 20:52:54 -04:00
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
2014-09-10 08:23:34 -07:00
static void bl_end_par_io_write ( void * data )
2011-07-30 20:52:54 -04:00
{
2014-06-09 11:48:35 -04:00
struct nfs_pgio_header * hdr = data ;
2011-07-30 20:52:54 -04:00
2014-06-09 11:48:35 -04:00
hdr - > task . tk_status = hdr - > pnfs_error ;
2014-06-09 11:48:36 -04:00
hdr - > verf . committed = NFS_FILE_SYNC ;
2014-06-09 11:48:35 -04:00
INIT_WORK ( & hdr - > task . u . tk_work , bl_write_cleanup ) ;
schedule_work ( & hdr - > task . u . tk_work ) ;
2011-07-30 20:52:54 -04:00
}
2011-07-30 20:52:39 -04:00
static enum pnfs_try_status
2014-06-09 11:48:35 -04:00
bl_write_pagelist ( struct nfs_pgio_header * header , int sync )
2011-07-30 20:52:39 -04:00
{
2014-09-10 08:23:34 -07:00
struct pnfs_block_layout * bl = BLK_LSEG2EXT ( header - > lseg ) ;
2014-09-10 17:37:27 -07:00
struct pnfs_block_dev_map map = { . start = NFS4_MAX_UINT64 } ;
2011-07-30 20:52:54 -04:00
struct bio * bio = NULL ;
2014-09-10 08:23:34 -07:00
struct pnfs_block_extent be ;
2014-09-10 08:23:32 -07:00
sector_t isect , extent_length = 0 ;
2012-08-24 00:27:53 +08:00
struct parallel_io * par = NULL ;
2014-06-09 11:48:35 -04:00
loff_t offset = header - > args . offset ;
size_t count = header - > args . count ;
struct page * * pages = header - > args . pages ;
2014-09-10 08:23:32 -07:00
int pg_index = pg_index = header - > args . pgbase > > PAGE_CACHE_SHIFT ;
2014-09-10 17:37:27 -07:00
unsigned int pg_len ;
2014-08-21 11:09:28 -05:00
struct blk_plug plug ;
2014-09-10 08:23:34 -07:00
int i ;
2011-07-30 20:52:54 -04:00
dprintk ( " %s enter, %Zu@%lld \n " , __func__ , count , offset ) ;
2012-08-24 00:27:53 +08:00
2014-06-09 11:48:35 -04:00
/* At this point, header->page_aray is a (sequential) list of nfs_pages.
2011-07-30 20:52:56 -04:00
* We want to write each , and if there is an error set pnfs_error
* to have it redone using nfs .
2011-07-30 20:52:54 -04:00
*/
2014-06-09 11:48:35 -04:00
par = alloc_parallel ( header ) ;
2011-07-30 20:52:54 -04:00
if ( ! par )
2014-09-10 08:23:34 -07:00
return PNFS_NOT_ATTEMPTED ;
2011-07-30 20:52:54 -04:00
par - > pnfs_callback = bl_end_par_io_write ;
2014-09-10 08:23:32 -07:00
blk_start_plug ( & plug ) ;
2011-07-30 20:52:56 -04:00
2014-09-10 08:23:32 -07:00
/* we always write out the whole page */
offset = offset & ( loff_t ) PAGE_CACHE_MASK ;
isect = offset > > SECTOR_SHIFT ;
2011-07-30 20:52:56 -04:00
2014-06-09 11:48:35 -04:00
for ( i = pg_index ; i < header - > page_array . npages ; i + + ) {
2014-08-21 11:09:29 -05:00
if ( extent_length < = 0 ) {
2011-07-30 20:52:54 -04:00
/* We've used up the previous extent */
bio = bl_submit_bio ( WRITE , bio ) ;
/* Get the next one */
2014-09-10 08:23:34 -07:00
if ( ! ext_tree_lookup ( bl , isect , & be , true ) ) {
2012-04-20 14:47:44 -04:00
header - > pnfs_error = - EINVAL ;
2011-07-30 20:52:54 -04:00
goto out ;
}
2012-08-24 00:27:51 +08:00
2014-09-10 08:23:34 -07:00
extent_length = be . be_length - ( isect - be . be_f_offset ) ;
2011-07-30 20:52:56 -04:00
}
2012-08-24 00:27:51 +08:00
2014-09-10 17:37:27 -07:00
pg_len = PAGE_CACHE_SIZE ;
2014-06-09 11:48:35 -04:00
bio = do_add_page_to_bio ( bio , header - > page_array . npages - i ,
2014-09-10 17:37:27 -07:00
WRITE , isect , pages [ i ] , & map , & be ,
2012-08-24 00:27:51 +08:00
bl_end_io_write , par ,
2014-09-10 17:37:27 -07:00
0 , & pg_len ) ;
2011-07-30 20:52:56 -04:00
if ( IS_ERR ( bio ) ) {
2012-04-20 14:47:44 -04:00
header - > pnfs_error = PTR_ERR ( bio ) ;
2011-09-22 21:50:16 -04:00
bio = NULL ;
2011-07-30 20:52:56 -04:00
goto out ;
2011-07-30 20:52:54 -04:00
}
2014-09-10 17:37:27 -07:00
offset + = pg_len ;
count - = pg_len ;
isect + = ( pg_len > > SECTOR_SHIFT ) ;
extent_length - = ( pg_len > > SECTOR_SHIFT ) ;
2011-07-30 20:52:54 -04:00
}
2011-07-30 20:52:56 -04:00
2014-06-09 11:48:35 -04:00
header - > res . count = header - > args . count ;
2011-07-30 20:52:54 -04:00
out :
bl_submit_bio ( WRITE , bio ) ;
2014-08-21 11:09:28 -05:00
blk_finish_plug ( & plug ) ;
2011-07-30 20:52:54 -04:00
put_parallel ( par ) ;
return PNFS_ATTEMPTED ;
2011-07-30 20:52:39 -04:00
}
static void bl_free_layout_hdr ( struct pnfs_layout_hdr * lo )
{
struct pnfs_block_layout * bl = BLK_LO2EXT ( lo ) ;
2014-09-10 08:23:34 -07:00
int err ;
2011-07-30 20:52:39 -04:00
dprintk ( " %s enter \n " , __func__ ) ;
2014-09-10 08:23:34 -07:00
err = ext_tree_remove ( bl , true , 0 , LLONG_MAX ) ;
WARN_ON ( err ) ;
2011-07-30 20:52:39 -04:00
kfree ( bl ) ;
}
static struct pnfs_layout_hdr * bl_alloc_layout_hdr ( struct inode * inode ,
gfp_t gfp_flags )
{
struct pnfs_block_layout * bl ;
dprintk ( " %s enter \n " , __func__ ) ;
bl = kzalloc ( sizeof ( * bl ) , gfp_flags ) ;
if ( ! bl )
return NULL ;
2014-09-10 08:23:34 -07:00
bl - > bl_ext_rw = RB_ROOT ;
bl - > bl_ext_ro = RB_ROOT ;
2011-07-30 20:52:39 -04:00
spin_lock_init ( & bl - > bl_ext_lock ) ;
2014-09-10 08:23:34 -07:00
2011-07-30 20:52:39 -04:00
return & bl - > bl_layout ;
}
2011-07-30 20:52:44 -04:00
static void bl_free_lseg ( struct pnfs_layout_segment * lseg )
2011-07-30 20:52:39 -04:00
{
2011-07-30 20:52:44 -04:00
dprintk ( " %s enter \n " , __func__ ) ;
kfree ( lseg ) ;
2011-07-30 20:52:39 -04:00
}
2014-09-10 17:37:24 -07:00
/* Tracks info needed to ensure extents in layout obey constraints of spec */
struct layout_verification {
u32 mode ; /* R or RW */
u64 start ; /* Expected start of next non-COW extent */
u64 inval ; /* Start of INVAL coverage */
u64 cowread ; /* End of COW read coverage */
} ;
/* Verify the extent meets the layout requirements of the pnfs-block draft,
* section 2.3 .1 .
*/
static int verify_extent ( struct pnfs_block_extent * be ,
struct layout_verification * lv )
{
if ( lv - > mode = = IOMODE_READ ) {
if ( be - > be_state = = PNFS_BLOCK_READWRITE_DATA | |
be - > be_state = = PNFS_BLOCK_INVALID_DATA )
return - EIO ;
if ( be - > be_f_offset ! = lv - > start )
return - EIO ;
lv - > start + = be - > be_length ;
return 0 ;
}
/* lv->mode == IOMODE_RW */
if ( be - > be_state = = PNFS_BLOCK_READWRITE_DATA ) {
if ( be - > be_f_offset ! = lv - > start )
return - EIO ;
if ( lv - > cowread > lv - > start )
return - EIO ;
lv - > start + = be - > be_length ;
lv - > inval = lv - > start ;
return 0 ;
} else if ( be - > be_state = = PNFS_BLOCK_INVALID_DATA ) {
if ( be - > be_f_offset ! = lv - > start )
return - EIO ;
lv - > start + = be - > be_length ;
return 0 ;
} else if ( be - > be_state = = PNFS_BLOCK_READ_DATA ) {
if ( be - > be_f_offset > lv - > start )
return - EIO ;
if ( be - > be_f_offset < lv - > inval )
return - EIO ;
if ( be - > be_f_offset < lv - > cowread )
return - EIO ;
/* It looks like you might want to min this with lv->start,
* but you really don ' t .
*/
lv - > inval = lv - > inval + be - > be_length ;
lv - > cowread = be - > be_f_offset + be - > be_length ;
return 0 ;
} else
return - EIO ;
}
static int decode_sector_number ( __be32 * * rp , sector_t * sp )
{
uint64_t s ;
* rp = xdr_decode_hyper ( * rp , & s ) ;
if ( s & 0x1ff ) {
printk ( KERN_WARNING " NFS: %s: sector not aligned \n " , __func__ ) ;
return - 1 ;
}
* sp = s > > SECTOR_SHIFT ;
return 0 ;
}
static int
2014-09-10 17:37:25 -07:00
bl_alloc_extent ( struct xdr_stream * xdr , struct pnfs_layout_hdr * lo ,
struct layout_verification * lv , struct list_head * extents ,
gfp_t gfp_mask )
2014-09-10 17:37:24 -07:00
{
2014-09-10 17:37:25 -07:00
struct pnfs_block_extent * be ;
struct nfs4_deviceid id ;
int error ;
2014-09-10 17:37:24 -07:00
__be32 * p ;
2014-09-10 17:37:25 -07:00
p = xdr_inline_decode ( xdr , 28 + NFS4_DEVICEID4_SIZE ) ;
if ( ! p )
return - EIO ;
be = kzalloc ( sizeof ( * be ) , GFP_NOFS ) ;
if ( ! be )
return - ENOMEM ;
memcpy ( & id , p , NFS4_DEVICEID4_SIZE ) ;
p + = XDR_QUADLEN ( NFS4_DEVICEID4_SIZE ) ;
error = - EIO ;
be - > be_device = nfs4_find_get_deviceid ( NFS_SERVER ( lo - > plh_inode ) , & id ,
lo - > plh_lc_cred , gfp_mask ) ;
if ( ! be - > be_device )
goto out_free_be ;
/*
* The next three values are read in as bytes , but stored in the
* extent structure in 512 - byte granularity .
*/
if ( decode_sector_number ( & p , & be - > be_f_offset ) < 0 )
goto out_put_deviceid ;
if ( decode_sector_number ( & p , & be - > be_length ) < 0 )
goto out_put_deviceid ;
if ( decode_sector_number ( & p , & be - > be_v_offset ) < 0 )
goto out_put_deviceid ;
be - > be_state = be32_to_cpup ( p + + ) ;
error = verify_extent ( be , lv ) ;
if ( error ) {
dprintk ( " %s: extent verification failed \n " , __func__ ) ;
goto out_put_deviceid ;
}
list_add_tail ( & be - > be_list , extents ) ;
return 0 ;
out_put_deviceid :
nfs4_put_deviceid_node ( be - > be_device ) ;
out_free_be :
kfree ( be ) ;
return error ;
}
static struct pnfs_layout_segment *
bl_alloc_lseg ( struct pnfs_layout_hdr * lo , struct nfs4_layoutget_res * lgr ,
gfp_t gfp_mask )
{
2014-09-10 17:37:24 -07:00
struct layout_verification lv = {
. mode = lgr - > range . iomode ,
. start = lgr - > range . offset > > SECTOR_SHIFT ,
. inval = lgr - > range . offset > > SECTOR_SHIFT ,
. cowread = lgr - > range . offset > > SECTOR_SHIFT ,
} ;
2014-09-10 17:37:25 -07:00
struct pnfs_block_layout * bl = BLK_LO2EXT ( lo ) ;
struct pnfs_layout_segment * lseg ;
struct xdr_buf buf ;
struct xdr_stream xdr ;
struct page * scratch ;
int status , i ;
uint32_t count ;
__be32 * p ;
2014-09-10 17:37:24 -07:00
LIST_HEAD ( extents ) ;
dprintk ( " ---> %s \n " , __func__ ) ;
2014-09-10 17:37:25 -07:00
lseg = kzalloc ( sizeof ( * lseg ) , gfp_mask ) ;
if ( ! lseg )
return ERR_PTR ( - ENOMEM ) ;
status = - ENOMEM ;
scratch = alloc_page ( gfp_mask ) ;
2014-09-10 17:37:24 -07:00
if ( ! scratch )
2014-09-10 17:37:25 -07:00
goto out ;
2014-09-10 17:37:24 -07:00
2014-09-10 17:37:25 -07:00
xdr_init_decode_pages ( & xdr , & buf ,
lgr - > layoutp - > pages , lgr - > layoutp - > len ) ;
xdr_set_scratch_buffer ( & xdr , page_address ( scratch ) , PAGE_SIZE ) ;
2014-09-10 17:37:24 -07:00
2014-09-10 17:37:25 -07:00
status = - EIO ;
p = xdr_inline_decode ( & xdr , 4 ) ;
2014-09-10 17:37:24 -07:00
if ( unlikely ( ! p ) )
2014-09-10 17:37:25 -07:00
goto out_free_scratch ;
2014-09-10 17:37:24 -07:00
count = be32_to_cpup ( p + + ) ;
2014-09-10 17:37:25 -07:00
dprintk ( " %s: number of extents %d \n " , __func__ , count ) ;
2014-09-10 17:37:24 -07:00
2014-09-10 17:37:25 -07:00
/*
* Decode individual extents , putting them in temporary staging area
* until whole layout is decoded to make error recovery easier .
2014-09-10 17:37:24 -07:00
*/
for ( i = 0 ; i < count ; i + + ) {
2014-09-10 17:37:25 -07:00
status = bl_alloc_extent ( & xdr , lo , & lv , & extents , gfp_mask ) ;
if ( status )
goto process_extents ;
2014-09-10 17:37:24 -07:00
}
2014-09-10 17:37:25 -07:00
2014-09-10 17:37:24 -07:00
if ( lgr - > range . offset + lgr - > range . length ! =
lv . start < < SECTOR_SHIFT ) {
dprintk ( " %s Final length mismatch \n " , __func__ ) ;
2014-09-10 17:37:25 -07:00
status = - EIO ;
goto process_extents ;
2014-09-10 17:37:24 -07:00
}
2014-09-10 17:37:25 -07:00
2014-09-10 17:37:24 -07:00
if ( lv . start < lv . cowread ) {
dprintk ( " %s Final uncovered COW extent \n " , __func__ ) ;
2014-09-10 17:37:25 -07:00
status = - EIO ;
2014-09-10 17:37:24 -07:00
}
2014-09-10 17:37:25 -07:00
process_extents :
2014-09-10 17:37:24 -07:00
while ( ! list_empty ( & extents ) ) {
2014-09-10 17:37:25 -07:00
struct pnfs_block_extent * be =
list_first_entry ( & extents , struct pnfs_block_extent ,
be_list ) ;
2014-09-10 17:37:24 -07:00
list_del ( & be - > be_list ) ;
2014-09-10 17:37:25 -07:00
if ( ! status )
status = ext_tree_insert ( bl , be ) ;
2011-07-30 20:52:44 -04:00
2014-09-10 17:37:25 -07:00
if ( status ) {
nfs4_put_deviceid_node ( be - > be_device ) ;
kfree ( be ) ;
}
}
out_free_scratch :
__free_page ( scratch ) ;
out :
dprintk ( " %s returns %d \n " , __func__ , status ) ;
2011-07-30 20:52:44 -04:00
if ( status ) {
kfree ( lseg ) ;
return ERR_PTR ( status ) ;
}
return lseg ;
2011-07-30 20:52:39 -04:00
}
2014-09-10 08:23:35 -07:00
static void
bl_return_range ( struct pnfs_layout_hdr * lo ,
struct pnfs_layout_range * range )
{
struct pnfs_block_layout * bl = BLK_LO2EXT ( lo ) ;
sector_t offset = range - > offset > > SECTOR_SHIFT , end ;
int err ;
if ( range - > offset % 8 ) {
dprintk ( " %s: offset %lld not block size aligned \n " ,
__func__ , range - > offset ) ;
return ;
}
if ( range - > length ! = NFS4_MAX_UINT64 ) {
if ( range - > length % 8 ) {
dprintk ( " %s: length %lld not block size aligned \n " ,
__func__ , range - > length ) ;
return ;
}
end = offset + ( range - > length > > SECTOR_SHIFT ) ;
} else {
end = round_down ( NFS4_MAX_UINT64 , PAGE_SIZE ) ;
}
err = ext_tree_remove ( bl , range - > iomode & IOMODE_RW , offset , end ) ;
}
2014-09-10 17:36:30 -07:00
static int
bl_prepare_layoutcommit ( struct nfs4_layoutcommit_args * arg )
2011-07-30 20:52:39 -04:00
{
2014-09-10 17:36:30 -07:00
return ext_tree_prepare_commit ( arg ) ;
2011-07-30 20:52:39 -04:00
}
static void
bl_cleanup_layoutcommit ( struct nfs4_layoutcommit_data * lcdata )
{
2014-09-10 17:36:30 -07:00
ext_tree_mark_committed ( & lcdata - > args , lcdata - > res . status ) ;
2011-07-30 20:52:39 -04:00
}
static int
bl_set_layoutdriver ( struct nfs_server * server , const struct nfs_fh * fh )
{
dprintk ( " %s enter \n " , __func__ ) ;
2011-07-30 20:52:46 -04:00
if ( server - > pnfs_blksize = = 0 ) {
dprintk ( " %s Server did not return blksize \n " , __func__ ) ;
return - EINVAL ;
}
2014-08-21 11:09:26 -05:00
if ( server - > pnfs_blksize > PAGE_SIZE ) {
printk ( KERN_ERR " %s: pNFS blksize %d not supported. \n " ,
__func__ , server - > pnfs_blksize ) ;
return - EINVAL ;
}
2014-09-10 17:36:31 -07:00
return 0 ;
2011-07-30 20:52:39 -04:00
}
2012-08-24 00:27:52 +08:00
static bool
2014-09-10 08:23:32 -07:00
is_aligned_req ( struct nfs_pageio_descriptor * pgio ,
struct nfs_page * req , unsigned int alignment )
2012-08-24 00:27:52 +08:00
{
2014-09-10 08:23:32 -07:00
/*
* Always accept buffered writes , higher layers take care of the
* right alignment .
*/
if ( pgio - > pg_dreq = = NULL )
return true ;
if ( ! IS_ALIGNED ( req - > wb_offset , alignment ) )
return false ;
if ( IS_ALIGNED ( req - > wb_bytes , alignment ) )
return true ;
if ( req_offset ( req ) + req - > wb_bytes = = i_size_read ( pgio - > pg_inode ) ) {
/*
* If the write goes up to the inode size , just write
* the full page . Data past the inode size is
* guaranteed to be zeroed by the higher level client
* code , and this behaviour is mandated by RFC 5663
* section 2.3 .2 .
*/
return true ;
}
return false ;
2012-08-24 00:27:52 +08:00
}
static void
bl_pg_init_read ( struct nfs_pageio_descriptor * pgio , struct nfs_page * req )
{
2014-09-10 08:23:32 -07:00
if ( ! is_aligned_req ( pgio , req , SECTOR_SIZE ) ) {
2012-08-24 00:27:52 +08:00
nfs_pageio_reset_read_mds ( pgio ) ;
2014-09-10 08:23:32 -07:00
return ;
}
pnfs_generic_pg_init_read ( pgio , req ) ;
2012-08-24 00:27:52 +08:00
}
2014-05-15 11:56:43 -04:00
/*
* Return 0 if @ req cannot be coalesced into @ pgio , otherwise return the number
* of bytes ( maximum @ req - > wb_bytes ) that can be coalesced .
*/
static size_t
2012-08-24 00:27:52 +08:00
bl_pg_test_read ( struct nfs_pageio_descriptor * pgio , struct nfs_page * prev ,
struct nfs_page * req )
{
2014-09-10 08:23:32 -07:00
if ( ! is_aligned_req ( pgio , req , SECTOR_SIZE ) )
2014-05-15 11:56:43 -04:00
return 0 ;
2012-08-24 00:27:52 +08:00
return pnfs_generic_pg_test ( pgio , prev , req ) ;
}
2012-09-25 14:55:57 +08:00
/*
* Return the number of contiguous bytes for a given inode
* starting at page frame idx .
*/
static u64 pnfs_num_cont_bytes ( struct inode * inode , pgoff_t idx )
{
struct address_space * mapping = inode - > i_mapping ;
pgoff_t end ;
/* Optimize common case that writes from 0 to end of file */
end = DIV_ROUND_UP ( i_size_read ( inode ) , PAGE_CACHE_SIZE ) ;
if ( end ! = NFS_I ( inode ) - > npages ) {
rcu_read_lock ( ) ;
2014-04-03 14:47:44 -07:00
end = page_cache_next_hole ( mapping , idx + 1 , ULONG_MAX ) ;
2012-09-25 14:55:57 +08:00
rcu_read_unlock ( ) ;
}
if ( ! end )
return i_size_read ( inode ) - ( idx < < PAGE_CACHE_SHIFT ) ;
else
return ( end - idx ) < < PAGE_CACHE_SHIFT ;
}
2012-10-02 08:29:14 -07:00
static void
2012-08-24 00:27:53 +08:00
bl_pg_init_write ( struct nfs_pageio_descriptor * pgio , struct nfs_page * req )
{
2014-09-10 08:23:32 -07:00
u64 wb_size ;
if ( ! is_aligned_req ( pgio , req , PAGE_SIZE ) ) {
2012-08-24 00:27:53 +08:00
nfs_pageio_reset_write_mds ( pgio ) ;
2014-09-10 08:23:32 -07:00
return ;
2012-09-25 14:55:57 +08:00
}
2014-09-10 08:23:32 -07:00
if ( pgio - > pg_dreq = = NULL )
wb_size = pnfs_num_cont_bytes ( pgio - > pg_inode ,
req - > wb_index ) ;
else
wb_size = nfs_dreq_bytes_left ( pgio - > pg_dreq ) ;
pnfs_generic_pg_init_write ( pgio , req , wb_size ) ;
2012-08-24 00:27:53 +08:00
}
2014-05-15 11:56:43 -04:00
/*
* Return 0 if @ req cannot be coalesced into @ pgio , otherwise return the number
* of bytes ( maximum @ req - > wb_bytes ) that can be coalesced .
*/
static size_t
2012-08-24 00:27:53 +08:00
bl_pg_test_write ( struct nfs_pageio_descriptor * pgio , struct nfs_page * prev ,
struct nfs_page * req )
{
2014-09-10 08:23:32 -07:00
if ( ! is_aligned_req ( pgio , req , PAGE_SIZE ) )
2014-05-15 11:56:43 -04:00
return 0 ;
2012-08-24 00:27:53 +08:00
return pnfs_generic_pg_test ( pgio , prev , req ) ;
}
2011-07-30 20:52:40 -04:00
static const struct nfs_pageio_ops bl_pg_read_ops = {
2012-08-24 00:27:52 +08:00
. pg_init = bl_pg_init_read ,
. pg_test = bl_pg_test_read ,
2011-07-30 20:52:40 -04:00
. pg_doio = pnfs_generic_pg_readpages ,
} ;
static const struct nfs_pageio_ops bl_pg_write_ops = {
2012-08-24 00:27:53 +08:00
. pg_init = bl_pg_init_write ,
. pg_test = bl_pg_test_write ,
2011-07-30 20:52:40 -04:00
. pg_doio = pnfs_generic_pg_writepages ,
} ;
2011-07-30 20:52:39 -04:00
static struct pnfs_layoutdriver_type blocklayout_type = {
. id = LAYOUT_BLOCK_VOLUME ,
. name = " LAYOUT_BLOCK_VOLUME " ,
2013-02-04 21:15:02 +08:00
. owner = THIS_MODULE ,
2014-09-10 08:23:36 -07:00
. flags = PNFS_LAYOUTRET_ON_SETATTR |
PNFS_READ_WHOLE_PAGE ,
2011-07-30 20:52:39 -04:00
. read_pagelist = bl_read_pagelist ,
. write_pagelist = bl_write_pagelist ,
. alloc_layout_hdr = bl_alloc_layout_hdr ,
. free_layout_hdr = bl_free_layout_hdr ,
. alloc_lseg = bl_alloc_lseg ,
. free_lseg = bl_free_lseg ,
2014-09-10 08:23:35 -07:00
. return_range = bl_return_range ,
2014-09-10 17:36:30 -07:00
. prepare_layoutcommit = bl_prepare_layoutcommit ,
2011-07-30 20:52:39 -04:00
. cleanup_layoutcommit = bl_cleanup_layoutcommit ,
. set_layoutdriver = bl_set_layoutdriver ,
2014-09-02 21:28:00 -07:00
. alloc_deviceid_node = bl_alloc_deviceid_node ,
. free_deviceid_node = bl_free_deviceid_node ,
2011-07-30 20:52:40 -04:00
. pg_read_ops = & bl_pg_read_ops ,
. pg_write_ops = & bl_pg_write_ops ,
2011-07-30 20:52:39 -04:00
} ;
static int __init nfs4blocklayout_init ( void )
{
int ret ;
dprintk ( " %s: NFSv4 Block Layout Driver Registering... \n " , __func__ ) ;
ret = pnfs_register_layoutdriver ( & blocklayout_type ) ;
2011-07-30 20:52:42 -04:00
if ( ret )
goto out ;
2014-09-10 17:37:26 -07:00
ret = bl_init_pipefs ( ) ;
2012-01-10 17:04:32 +04:00
if ( ret )
2014-09-10 17:37:26 -07:00
goto out_unregister ;
return 0 ;
2011-07-30 20:52:42 -04:00
2014-09-10 17:37:26 -07:00
out_unregister :
2011-07-30 20:52:42 -04:00
pnfs_unregister_layoutdriver ( & blocklayout_type ) ;
2014-09-10 17:37:26 -07:00
out :
2011-07-30 20:52:39 -04:00
return ret ;
}
static void __exit nfs4blocklayout_exit ( void )
{
dprintk ( " %s: NFSv4 Block Layout Driver Unregistering... \n " ,
__func__ ) ;
2014-09-10 17:37:26 -07:00
bl_cleanup_pipefs ( ) ;
2011-07-30 20:52:39 -04:00
pnfs_unregister_layoutdriver ( & blocklayout_type ) ;
}
MODULE_ALIAS ( " nfs-layouttype4-3 " ) ;
module_init ( nfs4blocklayout_init ) ;
module_exit ( nfs4blocklayout_exit ) ;