2022-02-17 13:30:38 +00:00
// SPDX-License-Identifier: GPL-2.0-only
/* Object lifetime handling and tracing.
*
* Copyright ( C ) 2022 Red Hat , Inc . All Rights Reserved .
* Written by David Howells ( dhowells @ redhat . com )
*/
# include <linux/slab.h>
# include "internal.h"
/*
* Allocate an I / O request and initialise it .
*/
2021-08-26 09:24:42 -04:00
struct netfs_io_request * netfs_alloc_request ( struct address_space * mapping ,
struct file * file ,
loff_t start , size_t len ,
enum netfs_io_origin origin )
2022-02-17 13:30:38 +00:00
{
static atomic_t debug_ids ;
2021-06-29 22:37:05 +01:00
struct inode * inode = file ? file_inode ( file ) : mapping - > host ;
netfs: Fix gcc-12 warning by embedding vfs inode in netfs_i_context
While randstruct was satisfied with using an open-coded "void *" offset
cast for the netfs_i_context <-> inode casting, __builtin_object_size() as
used by FORTIFY_SOURCE was not as easily fooled. This was causing the
following complaint[1] from gcc v12:
In file included from include/linux/string.h:253,
from include/linux/ceph/ceph_debug.h:7,
from fs/ceph/inode.c:2:
In function 'fortify_memset_chk',
inlined from 'netfs_i_context_init' at include/linux/netfs.h:326:2,
inlined from 'ceph_alloc_inode' at fs/ceph/inode.c:463:2:
include/linux/fortify-string.h:242:25: warning: call to '__write_overflow_field' declared with attribute warning: detected write beyond size of field (1st parameter); maybe use struct_group()? [-Wattribute-warning]
242 | __write_overflow_field(p_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Fix this by embedding a struct inode into struct netfs_i_context (which
should perhaps be renamed to struct netfs_inode). The struct inode
vfs_inode fields are then removed from the 9p, afs, ceph and cifs inode
structs and vfs_inode is then simply changed to "netfs.inode" in those
filesystems.
Further, rename netfs_i_context to netfs_inode, get rid of the
netfs_inode() function that converted a netfs_i_context pointer to an
inode pointer (that can now be done with &ctx->inode) and rename the
netfs_i_context() function to netfs_inode() (which is now a wrapper
around container_of()).
Most of the changes were done with:
perl -p -i -e 's/vfs_inode/netfs.inode/'g \
`git grep -l 'vfs_inode' -- fs/{9p,afs,ceph,cifs}/*.[ch]`
Kees suggested doing it with a pair structure[2] and a special
declarator to insert that into the network filesystem's inode
wrapper[3], but I think it's cleaner to embed it - and then it doesn't
matter if struct randomisation reorders things.
Dave Chinner suggested using a filesystem-specific VFS_I() function in
each filesystem to convert that filesystem's own inode wrapper struct
into the VFS inode struct[4].
Version #2:
- Fix a couple of missed name changes due to a disabled cifs option.
- Rename nfs_i_context to nfs_inode
- Use "netfs" instead of "nic" as the member name in per-fs inode wrapper
structs.
[ This also undoes commit 507160f46c55 ("netfs: gcc-12: temporarily
disable '-Wattribute-warning' for now") that is no longer needed ]
Fixes: bc899ee1c898 ("netfs: Add a netfs inode context")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Xiubo Li <xiubli@redhat.com>
cc: Jonathan Corbet <corbet@lwn.net>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Steve French <smfrench@gmail.com>
cc: William Kucharski <william.kucharski@oracle.com>
cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
cc: Dave Chinner <david@fromorbit.com>
cc: linux-doc@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: samba-technical@lists.samba.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/d2ad3a3d7bdd794c6efb562d2f2b655fb67756b9.camel@kernel.org/ [1]
Link: https://lore.kernel.org/r/20220517210230.864239-1-keescook@chromium.org/ [2]
Link: https://lore.kernel.org/r/20220518202212.2322058-1-keescook@chromium.org/ [3]
Link: https://lore.kernel.org/r/20220524101205.GI2306852@dread.disaster.area/ [4]
Link: https://lore.kernel.org/r/165296786831.3591209.12111293034669289733.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/165305805651.4094995.7763502506786714216.stgit@warthog.procyon.org.uk # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-06-09 21:46:04 +01:00
struct netfs_inode * ctx = netfs_inode ( inode ) ;
2022-02-17 13:30:38 +00:00
struct netfs_io_request * rreq ;
2022-02-21 11:38:17 +00:00
bool is_unbuffered = ( origin = = NETFS_UNBUFFERED_WRITE | |
origin = = NETFS_DIO_READ | |
origin = = NETFS_DIO_WRITE ) ;
bool cached = ! is_unbuffered & & netfs_is_cache_enabled ( ctx ) ;
2022-01-20 21:55:46 +00:00
int ret ;
2022-02-17 13:30:38 +00:00
2022-03-09 11:01:12 +00:00
rreq = kzalloc ( ctx - > ops - > io_request_size ? : sizeof ( struct netfs_io_request ) ,
GFP_KERNEL ) ;
2022-01-20 21:55:46 +00:00
if ( ! rreq )
return ERR_PTR ( - ENOMEM ) ;
rreq - > start = start ;
rreq - > len = len ;
2023-11-22 17:18:17 +00:00
rreq - > upper_len = len ;
2022-01-20 21:55:46 +00:00
rreq - > origin = origin ;
2021-06-29 22:37:05 +01:00
rreq - > netfs_ops = ctx - > ops ;
2022-01-20 21:55:46 +00:00
rreq - > mapping = mapping ;
2021-06-29 22:37:05 +01:00
rreq - > inode = inode ;
rreq - > i_size = i_size_read ( inode ) ;
2022-01-20 21:55:46 +00:00
rreq - > debug_id = atomic_inc_return ( & debug_ids ) ;
INIT_LIST_HEAD ( & rreq - > subrequests ) ;
2023-10-12 09:06:24 +01:00
INIT_WORK ( & rreq - > work , NULL ) ;
2022-01-20 21:55:46 +00:00
refcount_set ( & rreq - > ref , 1 ) ;
2022-02-09 19:52:13 +00:00
2022-01-20 21:55:46 +00:00
__set_bit ( NETFS_RREQ_IN_PROGRESS , & rreq - > flags ) ;
netfs: Replace PG_fscache by setting folio->private and marking dirty
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag. The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.
The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.
Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.
If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.
With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.
Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems. This reenables the
use of PG_fscache in that inode. 9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2024-03-19 10:00:09 +00:00
if ( cached ) {
2022-02-09 19:52:13 +00:00
__set_bit ( NETFS_RREQ_WRITE_TO_CACHE , & rreq - > flags ) ;
netfs: Replace PG_fscache by setting folio->private and marking dirty
When dirty data is being written to the cache, setting/waiting on/clearing
the fscache flag is always done in tandem with setting/waiting on/clearing
the writeback flag. The netfslib buffered write routines wait on and set
both flags and the write request cleanup clears both flags, so the fscache
flag is almost superfluous.
The reason it isn't superfluous is because the fscache flag is also used to
indicate that data just read from the server is being written to the cache.
The flag is used to prevent a race involving overlapping direct-I/O writes
to the cache.
Change this to indicate that a page is in need of being copied to the cache
by placing a magic value in folio->private and marking the folios dirty.
Then when the writeback code sees a folio marked in this way, it only
writes it to the cache and not to the server.
If a folio that has this magic value set is modified, the value is just
replaced and the folio will then be uplodaded too.
With this, PG_fscache is no longer required by the netfslib core, 9p and
afs.
Ceph and nfs, however, still need to use the old PG_fscache-based tracking.
To deal with this, a flag, NETFS_ICTX_USE_PGPRIV2, now has to be set on the
flags in the netfs_inode struct for those filesystems. This reenables the
use of PG_fscache in that inode. 9p and afs use the netfslib write helpers
so get switched over; cifs, for the moment, does page-by-page manual access
to the cache, so doesn't use PG_fscache and is unaffected.
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Eric Van Hensbergen <ericvh@kernel.org>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Xiubo Li <xiubli@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Paulo Alcantara <pc@manguebit.com>
cc: Ronnie Sahlberg <ronniesahlberg@gmail.com>
cc: Shyam Prasad N <sprasad@microsoft.com>
cc: Tom Talpey <tom@talpey.com>
cc: Bharath SM <bharathsm@microsoft.com>
cc: Trond Myklebust <trond.myklebust@hammerspace.com>
cc: Anna Schumaker <anna@kernel.org>
cc: netfs@lists.linux.dev
cc: v9fs@lists.linux.dev
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: linux-nfs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2024-03-19 10:00:09 +00:00
if ( test_bit ( NETFS_ICTX_USE_PGPRIV2 , & ctx - > flags ) )
/* Filesystem uses deprecated PG_private_2 marking. */
__set_bit ( NETFS_RREQ_USE_PGPRIV2 , & rreq - > flags ) ;
}
netfs: Implement unbuffered/DIO read support
Implement support for unbuffered and DIO reads in the netfs library,
utilising the existing read helper code to do block splitting and
individual queuing. The code also handles extraction of the destination
buffer from the supplied iterator, allowing async unbuffered reads to take
place.
The read will be split up according to the rsize setting and, if supplied,
the ->clamp_length() method. Note that the next subrequest will be issued
as soon as issue_op returns, without waiting for previous ones to finish.
The network filesystem needs to pause or handle queuing them if it doesn't
want to fire them all at the server simultaneously.
Once all the subrequests have finished, the state will be assessed and the
amount of data to be indicated as having being obtained will be
determined. As the subrequests may finish in any order, if an intermediate
subrequest is short, any further subrequests may be copied into the buffer
and then abandoned.
In the future, this will also take care of doing an unbuffered read from
encrypted content, with the decryption being done by the library.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2022-01-14 17:39:55 +00:00
if ( file & & file - > f_flags & O_NONBLOCK )
__set_bit ( NETFS_RREQ_NONBLOCK , & rreq - > flags ) ;
2022-01-20 21:55:46 +00:00
if ( rreq - > netfs_ops - > init_request ) {
ret = rreq - > netfs_ops - > init_request ( rreq , file ) ;
if ( ret < 0 ) {
kfree ( rreq ) ;
return ERR_PTR ( ret ) ;
}
2022-02-17 13:30:38 +00:00
}
2022-02-09 19:52:13 +00:00
trace_netfs_rreq_ref ( rreq - > debug_id , 1 , netfs_rreq_trace_new ) ;
2022-03-04 10:34:27 +00:00
netfs_proc_add_rreq ( rreq ) ;
2022-01-20 21:55:46 +00:00
netfs_stat ( & netfs_n_rh_rreq ) ;
2022-02-17 13:30:38 +00:00
return rreq ;
}
2022-02-17 21:13:05 +00:00
void netfs_get_request ( struct netfs_io_request * rreq , enum netfs_rreq_ref_trace what )
2022-02-17 13:30:38 +00:00
{
2022-02-17 21:13:05 +00:00
int r ;
__refcount_inc ( & rreq - > ref , & r ) ;
trace_netfs_rreq_ref ( rreq - > debug_id , r + 1 , what ) ;
2022-02-17 13:30:38 +00:00
}
void netfs_clear_subrequests ( struct netfs_io_request * rreq , bool was_async )
{
struct netfs_io_subrequest * subreq ;
while ( ! list_empty ( & rreq - > subrequests ) ) {
subreq = list_first_entry ( & rreq - > subrequests ,
struct netfs_io_subrequest , rreq_link ) ;
list_del ( & subreq - > rreq_link ) ;
2022-02-17 15:01:24 +00:00
netfs_put_subrequest ( subreq , was_async ,
netfs_sreq_trace_put_clear ) ;
2022-02-17 13:30:38 +00:00
}
}
static void netfs_free_request ( struct work_struct * work )
{
struct netfs_io_request * rreq =
container_of ( work , struct netfs_io_request , work ) ;
2021-07-09 08:41:17 +01:00
unsigned int i ;
2021-06-29 22:37:05 +01:00
2022-02-17 13:30:38 +00:00
trace_netfs_rreq ( rreq , netfs_rreq_trace_free ) ;
2022-03-04 10:34:27 +00:00
netfs_proc_del_rreq ( rreq ) ;
2022-02-25 11:19:14 +00:00
netfs_clear_subrequests ( rreq , false ) ;
if ( rreq - > netfs_ops - > free_request )
rreq - > netfs_ops - > free_request ( rreq ) ;
2022-02-17 13:30:38 +00:00
if ( rreq - > cache_resources . ops )
rreq - > cache_resources . ops - > end_operation ( & rreq - > cache_resources ) ;
2021-07-09 08:41:17 +01:00
if ( rreq - > direct_bv ) {
for ( i = 0 ; i < rreq - > direct_bv_count ; i + + ) {
if ( rreq - > direct_bv [ i ] . bv_page ) {
if ( rreq - > direct_bv_unpin )
unpin_user_page ( rreq - > direct_bv [ i ] . bv_page ) ;
}
}
kvfree ( rreq - > direct_bv ) ;
}
2022-03-04 10:34:27 +00:00
kfree_rcu ( rreq , rcu ) ;
2022-02-17 13:30:38 +00:00
netfs_stat_d ( & netfs_n_rh_rreq ) ;
}
2022-02-17 21:13:05 +00:00
void netfs_put_request ( struct netfs_io_request * rreq , bool was_async ,
enum netfs_rreq_ref_trace what )
2022-02-17 13:30:38 +00:00
{
2023-09-29 14:35:17 +01:00
unsigned int debug_id ;
2022-02-17 21:13:05 +00:00
bool dead ;
int r ;
2023-09-29 14:35:17 +01:00
if ( rreq ) {
debug_id = rreq - > debug_id ;
dead = __refcount_dec_and_test ( & rreq - > ref , & r ) ;
trace_netfs_rreq_ref ( debug_id , r - 1 , what ) ;
if ( dead ) {
if ( was_async ) {
rreq - > work . func = netfs_free_request ;
if ( ! queue_work ( system_unbound_wq , & rreq - > work ) )
BUG ( ) ;
} else {
netfs_free_request ( & rreq - > work ) ;
}
2022-02-17 13:30:38 +00:00
}
}
}
/*
* Allocate and partially initialise an I / O request structure .
*/
struct netfs_io_subrequest * netfs_alloc_subrequest ( struct netfs_io_request * rreq )
{
struct netfs_io_subrequest * subreq ;
2022-03-09 11:01:12 +00:00
subreq = kzalloc ( rreq - > netfs_ops - > io_subrequest_size ? :
sizeof ( struct netfs_io_subrequest ) ,
GFP_KERNEL ) ;
2022-02-17 13:30:38 +00:00
if ( subreq ) {
2022-02-09 19:52:13 +00:00
INIT_WORK ( & subreq - > work , NULL ) ;
2022-02-17 13:30:38 +00:00
INIT_LIST_HEAD ( & subreq - > rreq_link ) ;
2022-02-17 15:01:24 +00:00
refcount_set ( & subreq - > ref , 2 ) ;
2022-02-17 13:30:38 +00:00
subreq - > rreq = rreq ;
2024-03-21 10:16:55 +00:00
subreq - > debug_index = atomic_inc_return ( & rreq - > subreq_counter ) ;
2022-02-17 21:13:05 +00:00
netfs_get_request ( rreq , netfs_rreq_trace_get_subreq ) ;
2022-02-17 13:30:38 +00:00
netfs_stat ( & netfs_n_rh_sreq ) ;
}
return subreq ;
}
2022-02-17 15:01:24 +00:00
void netfs_get_subrequest ( struct netfs_io_subrequest * subreq ,
enum netfs_sreq_ref_trace what )
2022-02-17 13:30:38 +00:00
{
2022-02-17 15:01:24 +00:00
int r ;
__refcount_inc ( & subreq - > ref , & r ) ;
trace_netfs_sreq_ref ( subreq - > rreq - > debug_id , subreq - > debug_index , r + 1 ,
what ) ;
2022-02-17 13:30:38 +00:00
}
2022-02-17 15:01:24 +00:00
static void netfs_free_subrequest ( struct netfs_io_subrequest * subreq ,
bool was_async )
2022-02-17 13:30:38 +00:00
{
struct netfs_io_request * rreq = subreq - > rreq ;
trace_netfs_sreq ( subreq , netfs_sreq_trace_free ) ;
2022-02-25 11:19:14 +00:00
if ( rreq - > netfs_ops - > free_subrequest )
rreq - > netfs_ops - > free_subrequest ( subreq ) ;
2022-02-17 13:30:38 +00:00
kfree ( subreq ) ;
netfs_stat_d ( & netfs_n_rh_sreq ) ;
2022-02-17 21:13:05 +00:00
netfs_put_request ( rreq , was_async , netfs_rreq_trace_put_subreq ) ;
2022-02-17 13:30:38 +00:00
}
2022-02-17 15:01:24 +00:00
void netfs_put_subrequest ( struct netfs_io_subrequest * subreq , bool was_async ,
enum netfs_sreq_ref_trace what )
2022-02-17 13:30:38 +00:00
{
2022-02-17 15:01:24 +00:00
unsigned int debug_index = subreq - > debug_index ;
unsigned int debug_id = subreq - > rreq - > debug_id ;
bool dead ;
int r ;
dead = __refcount_dec_and_test ( & subreq - > ref , & r ) ;
trace_netfs_sreq_ref ( debug_id , debug_index , r - 1 , what ) ;
if ( dead )
netfs_free_subrequest ( subreq , was_async ) ;
2022-02-17 13:30:38 +00:00
}