2022-03-01 17:35:58 +03:00
// SPDX-License-Identifier: GPL-2.0-or-later
/* Network filesystem high-level buffered read support.
*
* Copyright ( C ) 2021 Red Hat , Inc . All Rights Reserved .
* Written by David Howells ( dhowells @ redhat . com )
*/
# include <linux/export.h>
# include <linux/task_io_accounting_ops.h>
# include "internal.h"
/*
* Unlock the folios in a read operation . We need to set PG_fscache on any
* folios we ' re going to write back before we unlock them .
*/
void netfs_rreq_unlock_folios ( struct netfs_io_request * rreq )
{
struct netfs_io_subrequest * subreq ;
2023-10-02 14:51:19 +03:00
struct netfs_folio * finfo ;
2022-03-01 17:35:58 +03:00
struct folio * folio ;
pgoff_t start_page = rreq - > start / PAGE_SIZE ;
pgoff_t last_page = ( ( rreq - > start + rreq - > len ) / PAGE_SIZE ) - 1 ;
2022-11-04 18:36:49 +03:00
size_t account = 0 ;
2022-03-01 17:35:58 +03:00
bool subreq_failed = false ;
XA_STATE ( xas , & rreq - > mapping - > i_pages , start_page ) ;
if ( test_bit ( NETFS_RREQ_FAILED , & rreq - > flags ) ) {
__clear_bit ( NETFS_RREQ_COPY_TO_CACHE , & rreq - > flags ) ;
list_for_each_entry ( subreq , & rreq - > subrequests , rreq_link ) {
__clear_bit ( NETFS_SREQ_COPY_TO_CACHE , & subreq - > flags ) ;
}
}
/* Walk through the pagecache and the I/O request lists simultaneously.
* We may have a mixture of cached and uncached sections and we only
* really want to write out the uncached sections . This is slightly
* complicated by the possibility that we might have huge pages with a
* mixture inside .
*/
subreq = list_first_entry ( & rreq - > subrequests ,
struct netfs_io_subrequest , rreq_link ) ;
subreq_failed = ( subreq - > error < 0 ) ;
trace_netfs_rreq ( rreq , netfs_rreq_trace_unlock ) ;
rcu_read_lock ( ) ;
xas_for_each ( & xas , folio , last_page ) {
2022-11-04 18:36:49 +03:00
loff_t pg_end ;
2022-03-01 17:35:58 +03:00
bool pg_failed = false ;
2023-09-18 16:17:11 +03:00
bool folio_started ;
2022-03-01 17:35:58 +03:00
2022-11-03 19:08:14 +03:00
if ( xas_retry ( & xas , folio ) )
continue ;
2022-11-04 18:36:49 +03:00
pg_end = folio_pos ( folio ) + folio_size ( folio ) - 1 ;
2022-11-03 19:08:14 +03:00
2023-09-18 16:17:11 +03:00
folio_started = false ;
2022-03-01 17:35:58 +03:00
for ( ; ; ) {
2022-11-04 18:36:49 +03:00
loff_t sreq_end ;
2022-03-01 17:35:58 +03:00
if ( ! subreq ) {
pg_failed = true ;
break ;
}
2023-09-18 16:17:11 +03:00
if ( ! folio_started & & test_bit ( NETFS_SREQ_COPY_TO_CACHE , & subreq - > flags ) ) {
2021-06-17 15:09:21 +03:00
trace_netfs_folio ( folio , netfs_folio_trace_copy_to_cache ) ;
2022-03-01 17:35:58 +03:00
folio_start_fscache ( folio ) ;
2023-09-18 16:17:11 +03:00
folio_started = true ;
}
2022-03-01 17:35:58 +03:00
pg_failed | = subreq_failed ;
2022-11-04 18:36:49 +03:00
sreq_end = subreq - > start + subreq - > len - 1 ;
if ( pg_end < sreq_end )
2022-03-01 17:35:58 +03:00
break ;
account + = subreq - > transferred ;
if ( ! list_is_last ( & subreq - > rreq_link , & rreq - > subrequests ) ) {
subreq = list_next_entry ( subreq , rreq_link ) ;
subreq_failed = ( subreq - > error < 0 ) ;
} else {
subreq = NULL ;
subreq_failed = false ;
}
2022-11-04 18:36:49 +03:00
if ( pg_end = = sreq_end )
2022-03-01 17:35:58 +03:00
break ;
}
if ( ! pg_failed ) {
flush_dcache_folio ( folio ) ;
2023-10-02 14:51:19 +03:00
finfo = netfs_folio_info ( folio ) ;
if ( finfo ) {
trace_netfs_folio ( folio , netfs_folio_trace_filled_gaps ) ;
if ( finfo - > netfs_group )
folio_change_private ( folio , finfo - > netfs_group ) ;
else
folio_detach_private ( folio ) ;
kfree ( finfo ) ;
}
2022-03-01 17:35:58 +03:00
folio_mark_uptodate ( folio ) ;
}
if ( ! test_bit ( NETFS_RREQ_DONT_UNLOCK_FOLIOS , & rreq - > flags ) ) {
2024-01-09 20:17:36 +03:00
if ( folio - > index = = rreq - > no_unlock_folio & &
2022-03-01 17:35:58 +03:00
test_bit ( NETFS_RREQ_NO_UNLOCK_FOLIO , & rreq - > flags ) )
_debug ( " no unlock " ) ;
else
folio_unlock ( folio ) ;
}
}
rcu_read_unlock ( ) ;
task_io_account_read ( account ) ;
if ( rreq - > netfs_ops - > done )
rreq - > netfs_ops - > done ( rreq ) ;
}
static void netfs_cache_expand_readahead ( struct netfs_io_request * rreq ,
loff_t * _start , size_t * _len , loff_t i_size )
{
struct netfs_cache_resources * cres = & rreq - > cache_resources ;
if ( cres - > ops & & cres - > ops - > expand_readahead )
cres - > ops - > expand_readahead ( cres , _start , _len , i_size ) ;
}
static void netfs_rreq_expand ( struct netfs_io_request * rreq ,
struct readahead_control * ractl )
{
/* Give the cache a chance to change the request parameters. The
* resultant request must contain the original region .
*/
netfs_cache_expand_readahead ( rreq , & rreq - > start , & rreq - > len , rreq - > i_size ) ;
/* Give the netfs a chance to change the request parameters. The
* resultant request must contain the original region .
*/
if ( rreq - > netfs_ops - > expand_readahead )
rreq - > netfs_ops - > expand_readahead ( rreq ) ;
/* Expand the request if the cache wants it to start earlier. Note
* that the expansion may get further extended if the VM wishes to
* insert THPs and the preferred start and / or end wind up in the middle
* of THPs .
*
* If this is the case , however , the THP size should be an integer
* multiple of the cache granule size , so we get a whole number of
* granules to deal with .
*/
if ( rreq - > start ! = readahead_pos ( ractl ) | |
rreq - > len ! = readahead_length ( ractl ) ) {
readahead_expand ( ractl , rreq - > start , rreq - > len ) ;
rreq - > start = readahead_pos ( ractl ) ;
rreq - > len = readahead_length ( ractl ) ;
trace_netfs_read ( rreq , readahead_pos ( ractl ) , readahead_length ( ractl ) ,
netfs_read_trace_expanded ) ;
}
}
2023-11-20 20:09:47 +03:00
/*
* Begin an operation , and fetch the stored zero point value from the cookie if
* available .
*/
static int netfs_begin_cache_read ( struct netfs_io_request * rreq , struct netfs_inode * ctx )
{
return fscache_begin_read_operation ( & rreq - > cache_resources , netfs_i_cookie ( ctx ) ) ;
}
2022-03-01 17:35:58 +03:00
/**
* netfs_readahead - Helper to manage a read request
* @ ractl : The description of the readahead request
*
* Fulfil a readahead request by drawing data from the cache if possible , or
* the netfs if not . Space beyond the EOF is zero - filled . Multiple I / O
* requests from different sources will get munged together . If necessary , the
* readahead window can be expanded in either direction to a more convenient
* alighment for RPC efficiency or to make storage in the cache feasible .
*
* The calling netfs must initialise a netfs context contiguous to the vfs
* inode before calling this .
*
* This is usable whether or not caching is enabled .
*/
void netfs_readahead ( struct readahead_control * ractl )
{
struct netfs_io_request * rreq ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
struct netfs_inode * ctx = netfs_inode ( ractl - > mapping - > host ) ;
2022-03-01 17:35:58 +03:00
int ret ;
_enter ( " %lx,%x " , readahead_index ( ractl ) , readahead_count ( ractl ) ) ;
if ( readahead_count ( ractl ) = = 0 )
return ;
rreq = netfs_alloc_request ( ractl - > mapping , ractl - > file ,
readahead_pos ( ractl ) ,
readahead_length ( ractl ) ,
NETFS_READAHEAD ) ;
if ( IS_ERR ( rreq ) )
return ;
2023-11-20 20:09:47 +03:00
ret = netfs_begin_cache_read ( rreq , ctx ) ;
if ( ret = = - ENOMEM | | ret = = - EINTR | | ret = = - ERESTARTSYS )
goto cleanup_free ;
2022-03-01 17:35:58 +03:00
netfs_stat ( & netfs_n_rh_readahead ) ;
trace_netfs_read ( rreq , readahead_pos ( ractl ) , readahead_length ( ractl ) ,
netfs_read_trace_readahead ) ;
netfs_rreq_expand ( rreq , ractl ) ;
2023-09-26 19:42:26 +03:00
/* Set up the output buffer */
iov_iter_xarray ( & rreq - > iter , ITER_DEST , & ractl - > mapping - > i_pages ,
rreq - > start , rreq - > len ) ;
2022-03-01 17:35:58 +03:00
/* Drop the refs on the folios here rather than in the cache or
* filesystem . The locks will be dropped in netfs_rreq_unlock ( ) .
*/
while ( readahead_folio ( ractl ) )
;
netfs_begin_read ( rreq , false ) ;
2023-10-04 18:15:48 +03:00
netfs_put_request ( rreq , false , netfs_rreq_trace_put_return ) ;
2022-03-01 17:35:58 +03:00
return ;
cleanup_free :
netfs_put_request ( rreq , false , netfs_rreq_trace_put_failed ) ;
return ;
}
EXPORT_SYMBOL ( netfs_readahead ) ;
/**
2022-04-29 15:49:28 +03:00
* netfs_read_folio - Helper to manage a read_folio request
2022-03-01 17:35:58 +03:00
* @ file : The file to read from
2022-04-29 15:49:28 +03:00
* @ folio : The folio to read
2022-03-01 17:35:58 +03:00
*
2022-04-29 15:49:28 +03:00
* Fulfil a read_folio request by drawing data from the cache if
* possible , or the netfs if not . Space beyond the EOF is zero - filled .
* Multiple I / O requests from different sources will get munged together .
2022-03-01 17:35:58 +03:00
*
* The calling netfs must initialise a netfs context contiguous to the vfs
* inode before calling this .
*
* This is usable whether or not caching is enabled .
*/
2022-04-29 15:49:28 +03:00
int netfs_read_folio ( struct file * file , struct folio * folio )
2022-03-01 17:35:58 +03:00
{
2024-01-09 20:17:36 +03:00
struct address_space * mapping = folio - > mapping ;
2022-03-01 17:35:58 +03:00
struct netfs_io_request * rreq ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 23:46:04 +03:00
struct netfs_inode * ctx = netfs_inode ( mapping - > host ) ;
2023-10-02 14:51:19 +03:00
struct folio * sink = NULL ;
2022-03-01 17:35:58 +03:00
int ret ;
2024-01-09 20:17:36 +03:00
_enter ( " %lx " , folio - > index ) ;
2022-03-01 17:35:58 +03:00
rreq = netfs_alloc_request ( mapping , file ,
folio_file_pos ( folio ) , folio_size ( folio ) ,
NETFS_READPAGE ) ;
if ( IS_ERR ( rreq ) ) {
ret = PTR_ERR ( rreq ) ;
goto alloc_error ;
}
2023-11-20 20:09:47 +03:00
ret = netfs_begin_cache_read ( rreq , ctx ) ;
if ( ret = = - ENOMEM | | ret = = - EINTR | | ret = = - ERESTARTSYS )
goto discard ;
2022-03-01 17:35:58 +03:00
netfs_stat ( & netfs_n_rh_readpage ) ;
trace_netfs_read ( rreq , rreq - > start , rreq - > len , netfs_read_trace_readpage ) ;
2023-09-26 19:42:26 +03:00
/* Set up the output buffer */
2023-10-02 14:51:19 +03:00
if ( folio_test_dirty ( folio ) ) {
/* Handle someone trying to read from an unflushed streaming
* write . We fiddle the buffer so that a gap at the beginning
* and / or a gap at the end get copied to , but the middle is
* discarded .
*/
struct netfs_folio * finfo = netfs_folio_info ( folio ) ;
struct bio_vec * bvec ;
unsigned int from = finfo - > dirty_offset ;
unsigned int to = from + finfo - > dirty_len ;
unsigned int off = 0 , i = 0 ;
size_t flen = folio_size ( folio ) ;
size_t nr_bvec = flen / PAGE_SIZE + 2 ;
size_t part ;
ret = - ENOMEM ;
bvec = kmalloc_array ( nr_bvec , sizeof ( * bvec ) , GFP_KERNEL ) ;
if ( ! bvec )
goto discard ;
sink = folio_alloc ( GFP_KERNEL , 0 ) ;
if ( ! sink )
goto discard ;
trace_netfs_folio ( folio , netfs_folio_trace_read_gaps ) ;
rreq - > direct_bv = bvec ;
rreq - > direct_bv_count = nr_bvec ;
if ( from > 0 ) {
bvec_set_folio ( & bvec [ i + + ] , folio , from , 0 ) ;
off = from ;
}
while ( off < to ) {
part = min_t ( size_t , to - off , PAGE_SIZE ) ;
bvec_set_folio ( & bvec [ i + + ] , sink , part , 0 ) ;
off + = part ;
}
if ( to < flen )
bvec_set_folio ( & bvec [ i + + ] , folio , flen - to , to ) ;
iov_iter_bvec ( & rreq - > iter , ITER_DEST , bvec , i , rreq - > len ) ;
} else {
iov_iter_xarray ( & rreq - > iter , ITER_DEST , & mapping - > i_pages ,
rreq - > start , rreq - > len ) ;
}
2023-09-26 19:42:26 +03:00
2023-10-04 18:15:48 +03:00
ret = netfs_begin_read ( rreq , true ) ;
2023-10-02 14:51:19 +03:00
if ( sink )
folio_put ( sink ) ;
2023-10-04 18:15:48 +03:00
netfs_put_request ( rreq , false , netfs_rreq_trace_put_return ) ;
2023-10-02 14:51:19 +03:00
return ret < 0 ? ret : 0 ;
2022-03-01 17:35:58 +03:00
discard :
netfs_put_request ( rreq , false , netfs_rreq_trace_put_discard ) ;
alloc_error :
folio_unlock ( folio ) ;
return ret ;
}
2022-04-29 15:49:28 +03:00
EXPORT_SYMBOL ( netfs_read_folio ) ;
2022-03-01 17:35:58 +03:00
/*
* Prepare a folio for writing without reading first
* @ folio : The folio being prepared
* @ pos : starting position for the write
* @ len : length of write
* @ always_fill : T if the folio should always be completely filled / cleared
*
* In some cases , write_begin doesn ' t need to read at all :
* - full folio write
* - write that lies in a folio that is completely beyond EOF
* - write that covers the folio from start to EOF or beyond it
*
* If any of these criteria are met , then zero out the unwritten parts
* of the folio and return true . Otherwise , return false .
*/
static bool netfs_skip_folio_read ( struct folio * folio , loff_t pos , size_t len ,
bool always_fill )
{
struct inode * inode = folio_inode ( folio ) ;
loff_t i_size = i_size_read ( inode ) ;
size_t offset = offset_in_folio ( folio , pos ) ;
size_t plen = folio_size ( folio ) ;
if ( unlikely ( always_fill ) ) {
if ( pos - offset + len < = i_size )
return false ; /* Page entirely before EOF */
zero_user_segment ( & folio - > page , 0 , plen ) ;
folio_mark_uptodate ( folio ) ;
return true ;
}
/* Full folio write */
if ( offset = = 0 & & len > = plen )
return true ;
/* Page entirely beyond the end of the file */
if ( pos - offset > = i_size )
goto zero_out ;
/* Write that covers from the start of the folio to EOF or beyond */
if ( offset = = 0 & & ( pos + len ) > = i_size )
goto zero_out ;
return false ;
zero_out :
zero_user_segments ( & folio - > page , 0 , offset , offset + len , plen ) ;
return true ;
}
/**
* netfs_write_begin - Helper to prepare for writing
2022-06-10 01:04:01 +03:00
* @ ctx : The netfs context
2022-03-01 17:35:58 +03:00
* @ file : The file to read from
* @ mapping : The mapping to read from
* @ pos : File position at which the write will begin
* @ len : The length of the write ( may extend beyond the end of the folio chosen )
* @ _folio : Where to put the resultant folio
* @ _fsdata : Place for the netfs to store a cookie
*
* Pre - read data for a write - begin request by drawing data from the cache if
* possible , or the netfs if not . Space beyond the EOF is zero - filled .
* Multiple I / O requests from different sources will get munged together . If
* necessary , the readahead window can be expanded in either direction to a
* more convenient alighment for RPC efficiency or to make storage in the cache
* feasible .
*
* The calling netfs must provide a table of operations , only one of which ,
* issue_op , is mandatory .
*
* The check_write_begin ( ) operation can be provided to check for and flush
* conflicting writes once the folio is grabbed and locked . It is passed a
* pointer to the fsdata cookie that gets returned to the VM to be passed to
* write_end . It is permitted to sleep . It should return 0 if the request
2022-07-11 07:11:21 +03:00
* should go ahead or it may return an error . It may also unlock and put the
* folio , provided it sets ` ` * foliop ` ` to NULL , in which case a return of 0
* will cause the folio to be re - got and the process to be retried .
2022-03-01 17:35:58 +03:00
*
* The calling netfs must initialise a netfs context contiguous to the vfs
* inode before calling this .
*
* This is usable whether or not caching is enabled .
*/
2022-06-10 01:04:01 +03:00
int netfs_write_begin ( struct netfs_inode * ctx ,
struct file * file , struct address_space * mapping ,
2022-02-22 18:47:09 +03:00
loff_t pos , unsigned int len , struct folio * * _folio ,
void * * _fsdata )
2022-03-01 17:35:58 +03:00
{
struct netfs_io_request * rreq ;
struct folio * folio ;
pgoff_t index = pos > > PAGE_SHIFT ;
int ret ;
DEFINE_READAHEAD ( ractl , file , NULL , mapping , index ) ;
retry :
2023-03-24 21:01:01 +03:00
folio = __filemap_get_folio ( mapping , index , FGP_WRITEBEGIN ,
2022-03-01 17:35:58 +03:00
mapping_gfp_mask ( mapping ) ) ;
2023-03-07 17:34:10 +03:00
if ( IS_ERR ( folio ) )
return PTR_ERR ( folio ) ;
2022-03-01 17:35:58 +03:00
if ( ctx - > ops - > check_write_begin ) {
/* Allow the netfs (eg. ceph) to flush conflicts. */
2022-07-11 07:11:21 +03:00
ret = ctx - > ops - > check_write_begin ( file , pos , len , & folio , _fsdata ) ;
2022-03-01 17:35:58 +03:00
if ( ret < 0 ) {
trace_netfs_failure ( NULL , NULL , ret , netfs_fail_check_write_begin ) ;
goto error ;
}
2022-07-11 07:11:21 +03:00
if ( ! folio )
goto retry ;
2022-03-01 17:35:58 +03:00
}
if ( folio_test_uptodate ( folio ) )
goto have_folio ;
/* If the page is beyond the EOF, we want to clear it - unless it's
* within the cache granule containing the EOF , in which case we need
* to preload the granule .
*/
if ( ! netfs_is_cache_enabled ( ctx ) & &
netfs_skip_folio_read ( folio , pos , len , false ) ) {
netfs_stat ( & netfs_n_rh_write_zskip ) ;
goto have_folio_no_wait ;
}
rreq = netfs_alloc_request ( mapping , file ,
folio_file_pos ( folio ) , folio_size ( folio ) ,
NETFS_READ_FOR_WRITE ) ;
if ( IS_ERR ( rreq ) ) {
ret = PTR_ERR ( rreq ) ;
goto error ;
}
2024-01-09 20:17:36 +03:00
rreq - > no_unlock_folio = folio - > index ;
2022-03-01 17:35:58 +03:00
__set_bit ( NETFS_RREQ_NO_UNLOCK_FOLIO , & rreq - > flags ) ;
2023-11-20 20:09:47 +03:00
ret = netfs_begin_cache_read ( rreq , ctx ) ;
if ( ret = = - ENOMEM | | ret = = - EINTR | | ret = = - ERESTARTSYS )
goto error_put ;
2022-03-01 17:35:58 +03:00
netfs_stat ( & netfs_n_rh_write_begin ) ;
trace_netfs_read ( rreq , pos , len , netfs_read_trace_write_begin ) ;
/* Expand the request to meet caching requirements and download
* preferences .
*/
ractl . _nr_pages = folio_nr_pages ( folio ) ;
netfs_rreq_expand ( rreq , & ractl ) ;
2023-09-26 19:42:26 +03:00
/* Set up the output buffer */
iov_iter_xarray ( & rreq - > iter , ITER_DEST , & mapping - > i_pages ,
rreq - > start , rreq - > len ) ;
2022-03-01 17:35:58 +03:00
/* We hold the folio locks, so we can drop the references */
folio_get ( folio ) ;
while ( readahead_folio ( & ractl ) )
;
ret = netfs_begin_read ( rreq , true ) ;
if ( ret < 0 )
goto error ;
2023-10-04 18:15:48 +03:00
netfs_put_request ( rreq , false , netfs_rreq_trace_put_return ) ;
2022-03-01 17:35:58 +03:00
have_folio :
ret = folio_wait_fscache_killable ( folio ) ;
if ( ret < 0 )
goto error ;
have_folio_no_wait :
* _folio = folio ;
_leave ( " = 0 " ) ;
return 0 ;
error_put :
netfs_put_request ( rreq , false , netfs_rreq_trace_put_failed ) ;
error :
2022-07-11 07:11:21 +03:00
if ( folio ) {
folio_unlock ( folio ) ;
folio_put ( folio ) ;
}
2022-03-01 17:35:58 +03:00
_leave ( " = %d " , ret ) ;
return ret ;
}
EXPORT_SYMBOL ( netfs_write_begin ) ;
2021-06-17 15:09:21 +03:00
/*
* Preload the data into a page we ' re proposing to write into .
*/
int netfs_prefetch_for_write ( struct file * file , struct folio * folio ,
size_t offset , size_t len )
{
struct netfs_io_request * rreq ;
2024-01-09 20:17:36 +03:00
struct address_space * mapping = folio - > mapping ;
2021-06-17 15:09:21 +03:00
struct netfs_inode * ctx = netfs_inode ( mapping - > host ) ;
unsigned long long start = folio_pos ( folio ) ;
size_t flen = folio_size ( folio ) ;
int ret ;
_enter ( " %zx @%llx " , flen , start ) ;
ret = - ENOMEM ;
rreq = netfs_alloc_request ( mapping , file , start , flen ,
NETFS_READ_FOR_WRITE ) ;
if ( IS_ERR ( rreq ) ) {
ret = PTR_ERR ( rreq ) ;
goto error ;
}
2024-01-09 20:17:36 +03:00
rreq - > no_unlock_folio = folio - > index ;
2021-06-17 15:09:21 +03:00
__set_bit ( NETFS_RREQ_NO_UNLOCK_FOLIO , & rreq - > flags ) ;
ret = netfs_begin_cache_read ( rreq , ctx ) ;
if ( ret = = - ENOMEM | | ret = = - EINTR | | ret = = - ERESTARTSYS )
goto error_put ;
netfs_stat ( & netfs_n_rh_write_begin ) ;
trace_netfs_read ( rreq , start , flen , netfs_read_trace_prefetch_for_write ) ;
/* Set up the output buffer */
iov_iter_xarray ( & rreq - > iter , ITER_DEST , & mapping - > i_pages ,
rreq - > start , rreq - > len ) ;
ret = netfs_begin_read ( rreq , true ) ;
netfs_put_request ( rreq , false , netfs_rreq_trace_put_return ) ;
return ret ;
error_put :
netfs_put_request ( rreq , false , netfs_rreq_trace_put_discard ) ;
error :
_leave ( " = %d " , ret ) ;
return ret ;
}
2023-10-11 11:29:43 +03:00
/**
* netfs_buffered_read_iter - Filesystem buffered I / O read routine
* @ iocb : kernel I / O control block
* @ iter : destination for the data read
*
* This is the - > read_iter ( ) routine for all filesystems that can use the page
* cache directly .
*
* The IOCB_NOWAIT flag in iocb - > ki_flags indicates that - EAGAIN shall be
* returned when no data can be read without waiting for I / O requests to
* complete ; it doesn ' t prevent readahead .
*
* The IOCB_NOIO flag in iocb - > ki_flags indicates that no new I / O requests
* shall be made for the read or for readahead . When no data can be read ,
* - EAGAIN shall be returned . When readahead would be triggered , a partial ,
* possibly empty read shall be returned .
*
* Return :
* * number of bytes copied , even for partial reads
* * negative error code ( or 0 if IOCB_NOIO ) if nothing was read
*/
ssize_t netfs_buffered_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
struct inode * inode = file_inode ( iocb - > ki_filp ) ;
struct netfs_inode * ictx = netfs_inode ( inode ) ;
ssize_t ret ;
if ( WARN_ON_ONCE ( ( iocb - > ki_flags & IOCB_DIRECT ) | |
test_bit ( NETFS_ICTX_UNBUFFERED , & ictx - > flags ) ) )
return - EINVAL ;
ret = netfs_start_io_read ( inode ) ;
if ( ret = = 0 ) {
ret = filemap_read ( iocb , iter , 0 ) ;
netfs_end_io_read ( inode ) ;
}
return ret ;
}
EXPORT_SYMBOL ( netfs_buffered_read_iter ) ;
/**
* netfs_file_read_iter - Generic filesystem read routine
* @ iocb : kernel I / O control block
* @ iter : destination for the data read
*
* This is the - > read_iter ( ) routine for all filesystems that can use the page
* cache directly .
*
* The IOCB_NOWAIT flag in iocb - > ki_flags indicates that - EAGAIN shall be
* returned when no data can be read without waiting for I / O requests to
* complete ; it doesn ' t prevent readahead .
*
* The IOCB_NOIO flag in iocb - > ki_flags indicates that no new I / O requests
* shall be made for the read or for readahead . When no data can be read ,
* - EAGAIN shall be returned . When readahead would be triggered , a partial ,
* possibly empty read shall be returned .
*
* Return :
* * number of bytes copied , even for partial reads
* * negative error code ( or 0 if IOCB_NOIO ) if nothing was read
*/
ssize_t netfs_file_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
struct netfs_inode * ictx = netfs_inode ( iocb - > ki_filp - > f_mapping - > host ) ;
if ( ( iocb - > ki_flags & IOCB_DIRECT ) | |
test_bit ( NETFS_ICTX_UNBUFFERED , & ictx - > flags ) )
return netfs_unbuffered_read_iter ( iocb , iter ) ;
return netfs_buffered_read_iter ( iocb , iter ) ;
}
EXPORT_SYMBOL ( netfs_file_read_iter ) ;