2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# ifndef NO_BCACHEFS_FS
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "buckets.h"
# include "clock.h"
# include "error.h"
2018-08-06 00:46:41 +03:00
# include "extents.h"
2019-11-15 23:52:28 +03:00
# include "extent_update.h"
2017-03-17 09:18:50 +03:00
# include "fs.h"
# include "fs-io.h"
# include "fsck.h"
# include "inode.h"
# include "journal.h"
# include "io.h"
# include "keylist.h"
# include "quota.h"
2019-08-16 16:59:56 +03:00
# include "reflink.h"
2017-03-17 09:18:50 +03:00
# include "trace.h"
# include <linux/aio.h>
# include <linux/backing-dev.h>
# include <linux/falloc.h>
# include <linux/migrate.h>
# include <linux/mmu_context.h>
# include <linux/pagevec.h>
2020-10-09 07:09:20 +03:00
# include <linux/rmap.h>
2017-03-17 09:18:50 +03:00
# include <linux/sched/signal.h>
# include <linux/task_io_accounting_ops.h>
# include <linux/uio.h>
# include <linux/writeback.h>
# include <trace/events/writeback.h>
2019-07-29 19:24:36 +03:00
static inline bool bio_full ( struct bio * bio , unsigned len )
{
if ( bio - > bi_vcnt > = bio - > bi_max_vecs )
return true ;
if ( bio - > bi_iter . bi_size > UINT_MAX - len )
return true ;
return false ;
}
2020-11-11 20:33:12 +03:00
static inline struct address_space * faults_disabled_mapping ( void )
{
return ( void * ) ( ( ( unsigned long ) current - > faults_disabled_mapping ) & ~ 1UL ) ;
}
static inline void set_fdm_dropped_locks ( void )
{
current - > faults_disabled_mapping =
( void * ) ( ( ( unsigned long ) current - > faults_disabled_mapping ) | 1 ) ;
}
static inline bool fdm_dropped_locks ( void )
{
return ( ( unsigned long ) current - > faults_disabled_mapping ) & 1 ;
}
2017-03-17 09:18:50 +03:00
struct quota_res {
u64 sectors ;
} ;
struct bch_writepage_io {
2019-10-09 19:50:39 +03:00
struct bch_inode_info * inode ;
2017-03-17 09:18:50 +03:00
/* must be last: */
2019-10-09 19:50:39 +03:00
struct bch_write_op op ;
2017-03-17 09:18:50 +03:00
} ;
struct dio_write {
struct kiocb * req ;
2022-11-01 03:30:27 +03:00
struct address_space * mapping ;
struct bch_inode_info * inode ;
2019-01-14 05:36:14 +03:00
struct mm_struct * mm ;
2017-03-17 09:18:50 +03:00
unsigned loop : 1 ,
2022-11-14 06:43:37 +03:00
extending : 1 ,
2017-03-17 09:18:50 +03:00
sync : 1 ,
2022-11-03 07:29:43 +03:00
flush : 1 ,
2017-03-17 09:18:50 +03:00
free_iov : 1 ;
struct quota_res quota_res ;
2020-06-30 01:22:06 +03:00
u64 written ;
2017-03-17 09:18:50 +03:00
struct iov_iter iter ;
struct iovec inline_vecs [ 2 ] ;
/* must be last: */
2019-10-09 19:50:39 +03:00
struct bch_write_op op ;
2017-03-17 09:18:50 +03:00
} ;
struct dio_read {
struct closure cl ;
struct kiocb * req ;
long ret ;
2021-01-21 22:42:23 +03:00
bool should_dirty ;
2017-03-17 09:18:50 +03:00
struct bch_read_bio rbio ;
} ;
/* pagecache_block must be held */
2022-11-02 23:45:28 +03:00
static noinline int write_invalidate_inode_pages_range ( struct address_space * mapping ,
2017-03-17 09:18:50 +03:00
loff_t start , loff_t end )
{
int ret ;
/*
* XXX : the way this is currently implemented , we can spin if a process
* is continually redirtying a specific page
*/
do {
if ( ! mapping - > nrpages )
return 0 ;
ret = filemap_write_and_wait_range ( mapping , start , end ) ;
if ( ret )
break ;
if ( ! mapping - > nrpages )
return 0 ;
ret = invalidate_inode_pages2_range ( mapping ,
start > > PAGE_SHIFT ,
end > > PAGE_SHIFT ) ;
} while ( ret = = - EBUSY ) ;
return ret ;
}
/* quotas */
# ifdef CONFIG_BCACHEFS_QUOTA
2022-11-14 06:43:37 +03:00
static void __bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res )
2017-03-17 09:18:50 +03:00
{
BUG_ON ( res - > sectors > inode - > ei_quota_reserved ) ;
bch2_quota_acct ( c , inode - > ei_qid , Q_SPC ,
2018-11-01 22:10:01 +03:00
- ( ( s64 ) res - > sectors ) , KEY_TYPE_QUOTA_PREALLOC ) ;
2017-03-17 09:18:50 +03:00
inode - > ei_quota_reserved - = res - > sectors ;
res - > sectors = 0 ;
}
2022-11-14 06:43:37 +03:00
static void bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res )
{
if ( res - > sectors ) {
mutex_lock ( & inode - > ei_quota_lock ) ;
__bch2_quota_reservation_put ( c , inode , res ) ;
mutex_unlock ( & inode - > ei_quota_lock ) ;
}
}
2017-03-17 09:18:50 +03:00
static int bch2_quota_reservation_add ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res ,
2022-10-11 11:32:14 +03:00
u64 sectors ,
2017-03-17 09:18:50 +03:00
bool check_enospc )
{
int ret ;
mutex_lock ( & inode - > ei_quota_lock ) ;
ret = bch2_quota_acct ( c , inode - > ei_qid , Q_SPC , sectors ,
2018-11-01 22:10:01 +03:00
check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK ) ;
2017-03-17 09:18:50 +03:00
if ( likely ( ! ret ) ) {
inode - > ei_quota_reserved + = sectors ;
res - > sectors + = sectors ;
}
mutex_unlock ( & inode - > ei_quota_lock ) ;
return ret ;
}
# else
2022-11-14 06:43:37 +03:00
static void __bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res ) { }
2017-03-17 09:18:50 +03:00
static void bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
2022-11-14 06:43:37 +03:00
struct quota_res * res ) { }
2017-03-17 09:18:50 +03:00
static int bch2_quota_reservation_add ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res ,
unsigned sectors ,
bool check_enospc )
{
return 0 ;
}
# endif
/* i_size updates: */
2018-07-17 21:12:42 +03:00
struct inode_new_size {
loff_t new_size ;
u64 now ;
unsigned fields ;
} ;
2017-03-17 09:18:50 +03:00
static int inode_set_size ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi ,
void * p )
{
2018-07-17 21:12:42 +03:00
struct inode_new_size * s = p ;
2017-03-17 09:18:50 +03:00
2018-07-17 21:12:42 +03:00
bi - > bi_size = s - > new_size ;
if ( s - > fields & ATTR_ATIME )
bi - > bi_atime = s - > now ;
if ( s - > fields & ATTR_MTIME )
bi - > bi_mtime = s - > now ;
if ( s - > fields & ATTR_CTIME )
bi - > bi_ctime = s - > now ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
2019-08-16 16:59:56 +03:00
int __must_check bch2_write_inode_size ( struct bch_fs * c ,
struct bch_inode_info * inode ,
loff_t new_size , unsigned fields )
2017-03-17 09:18:50 +03:00
{
2018-07-17 21:12:42 +03:00
struct inode_new_size s = {
. new_size = new_size ,
. now = bch2_current_time ( c ) ,
. fields = fields ,
} ;
return bch2_write_inode ( c , inode , inode_set_size , & s , fields ) ;
2017-03-17 09:18:50 +03:00
}
2022-11-14 06:43:37 +03:00
static void __i_sectors_acct ( struct bch_fs * c , struct bch_inode_info * inode ,
2018-08-06 00:48:00 +03:00
struct quota_res * quota_res , s64 sectors )
2017-03-17 09:18:50 +03:00
{
2022-04-16 23:06:59 +03:00
bch2_fs_inconsistent_on ( ( s64 ) inode - > v . i_blocks + sectors < 0 , c ,
" inode %lu i_blocks underflow: %llu + %lli < 0 (ondisk %lli) " ,
inode - > v . i_ino , ( u64 ) inode - > v . i_blocks , sectors ,
inode - > ei_inode . bi_sectors ) ;
2021-11-24 01:05:56 +03:00
inode - > v . i_blocks + = sectors ;
2017-03-17 09:18:50 +03:00
# ifdef CONFIG_BCACHEFS_QUOTA
if ( quota_res & & sectors > 0 ) {
BUG_ON ( sectors > quota_res - > sectors ) ;
BUG_ON ( sectors > inode - > ei_quota_reserved ) ;
quota_res - > sectors - = sectors ;
inode - > ei_quota_reserved - = sectors ;
} else {
2018-11-01 22:10:01 +03:00
bch2_quota_acct ( c , inode - > ei_qid , Q_SPC , sectors , KEY_TYPE_QUOTA_WARN ) ;
2017-03-17 09:18:50 +03:00
}
# endif
2022-11-14 06:43:37 +03:00
}
static void i_sectors_acct ( struct bch_fs * c , struct bch_inode_info * inode ,
struct quota_res * quota_res , s64 sectors )
{
if ( sectors ) {
mutex_lock ( & inode - > ei_quota_lock ) ;
__i_sectors_acct ( c , inode , quota_res , sectors ) ;
mutex_unlock ( & inode - > ei_quota_lock ) ;
}
2017-03-17 09:18:50 +03:00
}
/* page state: */
/* stored in page->private: */
2019-07-03 04:41:35 +03:00
struct bch_page_sector {
2021-11-24 01:05:56 +03:00
/* Uncompressed, fully allocated replicas (or on disk reservation): */
unsigned nr_replicas : 4 ;
2017-03-17 09:18:50 +03:00
2021-11-24 01:05:56 +03:00
/* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */
unsigned replicas_reserved : 4 ;
2019-07-03 04:41:35 +03:00
/* i_sectors: */
enum {
SECTOR_UNALLOCATED ,
2019-07-30 21:18:29 +03:00
SECTOR_RESERVED ,
2019-07-03 04:41:35 +03:00
SECTOR_DIRTY ,
2021-11-24 01:05:56 +03:00
SECTOR_DIRTY_RESERVED ,
2019-07-03 04:41:35 +03:00
SECTOR_ALLOCATED ,
2021-11-24 01:05:56 +03:00
} state : 8 ;
2019-07-03 04:41:35 +03:00
} ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
struct bch_page_state {
2019-10-09 16:19:06 +03:00
spinlock_t lock ;
2019-07-29 19:24:36 +03:00
atomic_t write_count ;
2021-11-24 02:17:04 +03:00
bool uptodate ;
2019-07-03 04:41:35 +03:00
struct bch_page_sector s [ PAGE_SECTORS ] ;
2017-03-17 09:18:50 +03:00
} ;
2019-07-03 04:41:35 +03:00
static inline struct bch_page_state * __bch2_page_state ( struct page * page )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
return page_has_private ( page )
? ( struct bch_page_state * ) page_private ( page )
: NULL ;
}
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
static inline struct bch_page_state * bch2_page_state ( struct page * page )
{
2019-07-02 21:59:15 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
return __bch2_page_state ( page ) ;
}
/* for newly allocated pages: */
static void __bch2_page_state_release ( struct page * page )
{
2020-11-05 18:58:38 +03:00
kfree ( detach_page_private ( page ) ) ;
2019-07-03 04:41:35 +03:00
}
static void bch2_page_state_release ( struct page * page )
{
2020-11-05 18:58:38 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
__bch2_page_state_release ( page ) ;
2019-07-03 04:41:35 +03:00
}
/* for newly allocated pages: */
static struct bch_page_state * __bch2_page_state_create ( struct page * page ,
gfp_t gfp )
{
struct bch_page_state * s ;
s = kzalloc ( sizeof ( * s ) , GFP_NOFS | gfp ) ;
if ( ! s )
return NULL ;
2017-03-17 09:18:50 +03:00
2019-10-09 16:19:06 +03:00
spin_lock_init ( & s - > lock ) ;
2020-11-05 18:58:38 +03:00
attach_page_private ( page , s ) ;
2017-03-17 09:18:50 +03:00
return s ;
}
2019-07-03 04:41:35 +03:00
static struct bch_page_state * bch2_page_state_create ( struct page * page ,
gfp_t gfp )
{
return bch2_page_state ( page ) ? : __bch2_page_state_create ( page , gfp ) ;
}
2022-11-14 02:59:01 +03:00
static unsigned bkey_to_sector_state ( struct bkey_s_c k )
2021-11-24 01:05:56 +03:00
{
2022-11-14 02:59:01 +03:00
if ( bkey_extent_is_reservation ( k ) )
2021-11-24 01:05:56 +03:00
return SECTOR_RESERVED ;
2022-11-14 02:59:01 +03:00
if ( bkey_extent_is_allocation ( k . k ) )
2021-11-24 01:05:56 +03:00
return SECTOR_ALLOCATED ;
return SECTOR_UNALLOCATED ;
}
2021-11-24 02:17:04 +03:00
static void __bch2_page_state_set ( struct page * page ,
unsigned pg_offset , unsigned pg_len ,
unsigned nr_ptrs , unsigned state )
{
struct bch_page_state * s = bch2_page_state_create ( page , __GFP_NOFAIL ) ;
unsigned i ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
spin_lock ( & s - > lock ) ;
for ( i = pg_offset ; i < pg_offset + pg_len ; i + + ) {
s - > s [ i ] . nr_replicas = nr_ptrs ;
s - > s [ i ] . state = state ;
}
if ( i = = PAGE_SECTORS )
s - > uptodate = true ;
spin_unlock ( & s - > lock ) ;
}
static int bch2_page_state_set ( struct bch_fs * c , subvol_inum inum ,
struct page * * pages , unsigned nr_pages )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
u64 offset = pages [ 0 ] - > index < < PAGE_SECTORS_SHIFT ;
unsigned pg_idx = 0 ;
u32 snapshot ;
int ret ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
SPOS ( inum . inum , offset , snapshot ) ,
BTREE_ITER_SLOTS , k , ret ) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated ( k ) ;
2022-11-14 02:59:01 +03:00
unsigned state = bkey_to_sector_state ( k ) ;
2021-11-24 02:17:04 +03:00
while ( pg_idx < nr_pages ) {
struct page * page = pages [ pg_idx ] ;
u64 pg_start = page - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( page - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( bkey_start_offset ( k . k ) , pg_start ) - pg_start ;
unsigned pg_len = min ( k . k - > p . offset , pg_end ) - pg_offset - pg_start ;
BUG_ON ( k . k - > p . offset < pg_start ) ;
BUG_ON ( bkey_start_offset ( k . k ) > pg_end ) ;
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate )
__bch2_page_state_set ( page , pg_offset , pg_len , nr_ptrs , state ) ;
if ( k . k - > p . offset < pg_end )
break ;
pg_idx + + ;
}
if ( pg_idx = = nr_pages )
break ;
}
offset = iter . pos . offset ;
bch2_trans_iter_exit ( & trans , & iter ) ;
err :
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-11-24 02:17:04 +03:00
goto retry ;
bch2_trans_exit ( & trans ) ;
return ret ;
}
2021-11-24 01:05:56 +03:00
static void bch2_bio_page_state_set ( struct bio * bio , struct bkey_s_c k )
{
struct bvec_iter iter ;
struct bio_vec bv ;
unsigned nr_ptrs = k . k - > type = = KEY_TYPE_reflink_v
? 0 : bch2_bkey_nr_ptrs_fully_allocated ( k ) ;
2022-11-14 02:59:01 +03:00
unsigned state = bkey_to_sector_state ( k ) ;
2021-11-24 01:05:56 +03:00
2021-11-24 02:17:04 +03:00
bio_for_each_segment ( bv , bio , iter )
__bch2_page_state_set ( bv . bv_page , bv . bv_offset > > 9 ,
bv . bv_len > > 9 , nr_ptrs , state ) ;
2021-11-24 01:05:56 +03:00
}
2021-11-24 02:21:09 +03:00
static void mark_pagecache_unallocated ( struct bch_inode_info * inode ,
u64 start , u64 end )
{
pgoff_t index = start > > PAGE_SECTORS_SHIFT ;
pgoff_t end_index = ( end - 1 ) > > PAGE_SECTORS_SHIFT ;
struct folio_batch fbatch ;
unsigned i , j ;
if ( end < = start )
return ;
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( inode - > v . i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
u64 pg_start = folio - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( folio - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( start , pg_start ) - pg_start ;
unsigned pg_len = min ( end , pg_end ) - pg_offset - pg_start ;
struct bch_page_state * s ;
BUG_ON ( end < = pg_start ) ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
folio_lock ( folio ) ;
s = bch2_page_state ( & folio - > page ) ;
if ( s ) {
spin_lock ( & s - > lock ) ;
for ( j = pg_offset ; j < pg_offset + pg_len ; j + + )
s - > s [ j ] . nr_replicas = 0 ;
spin_unlock ( & s - > lock ) ;
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
}
static void mark_pagecache_reserved ( struct bch_inode_info * inode ,
u64 start , u64 end )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
pgoff_t index = start > > PAGE_SECTORS_SHIFT ;
pgoff_t end_index = ( end - 1 ) > > PAGE_SECTORS_SHIFT ;
struct folio_batch fbatch ;
s64 i_sectors_delta = 0 ;
unsigned i , j ;
if ( end < = start )
return ;
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( inode - > v . i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
u64 pg_start = folio - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( folio - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( start , pg_start ) - pg_start ;
unsigned pg_len = min ( end , pg_end ) - pg_offset - pg_start ;
struct bch_page_state * s ;
BUG_ON ( end < = pg_start ) ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
folio_lock ( folio ) ;
s = bch2_page_state ( & folio - > page ) ;
if ( s ) {
spin_lock ( & s - > lock ) ;
for ( j = pg_offset ; j < pg_offset + pg_len ; j + + )
switch ( s - > s [ j ] . state ) {
case SECTOR_UNALLOCATED :
s - > s [ j ] . state = SECTOR_RESERVED ;
break ;
case SECTOR_DIRTY :
s - > s [ j ] . state = SECTOR_DIRTY_RESERVED ;
i_sectors_delta - - ;
break ;
default :
break ;
}
spin_unlock ( & s - > lock ) ;
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
}
2019-07-02 21:59:15 +03:00
static inline unsigned inode_nr_replicas ( struct bch_fs * c , struct bch_inode_info * inode )
{
/* XXX: this should not be open coded */
return inode - > ei_inode . bi_data_replicas
? inode - > ei_inode . bi_data_replicas - 1
: c - > opts . data_replicas ;
}
2019-07-03 04:41:35 +03:00
static inline unsigned sectors_to_reserve ( struct bch_page_sector * s ,
unsigned nr_replicas )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
return max ( 0 , ( int ) nr_replicas -
s - > nr_replicas -
s - > replicas_reserved ) ;
}
static int bch2_get_page_disk_reservation ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct page * page , bool check_enospc )
{
struct bch_page_state * s = bch2_page_state_create ( page , 0 ) ;
2019-07-02 21:59:15 +03:00
unsigned nr_replicas = inode_nr_replicas ( c , inode ) ;
2019-07-03 04:41:35 +03:00
struct disk_reservation disk_res = { 0 } ;
unsigned i , disk_res_sectors = 0 ;
int ret ;
if ( ! s )
return - ENOMEM ;
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + )
disk_res_sectors + = sectors_to_reserve ( & s - > s [ i ] , nr_replicas ) ;
if ( ! disk_res_sectors )
return 0 ;
ret = bch2_disk_reservation_get ( c , & disk_res ,
disk_res_sectors , 1 ,
! check_enospc
? BCH_DISK_RESERVATION_NOFAIL
: 0 ) ;
if ( unlikely ( ret ) )
return ret ;
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + )
s - > s [ i ] . replicas_reserved + =
sectors_to_reserve ( & s - > s [ i ] , nr_replicas ) ;
return 0 ;
}
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation {
struct disk_reservation disk ;
struct quota_res quota ;
} ;
static void bch2_page_reservation_init ( struct bch_fs * c ,
2019-07-03 04:41:35 +03:00
struct bch_inode_info * inode ,
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res )
{
memset ( res , 0 , sizeof ( * res ) ) ;
res - > disk . nr_replicas = inode_nr_replicas ( c , inode ) ;
}
static void bch2_page_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct bch2_page_reservation * res )
{
bch2_disk_reservation_put ( c , & res - > disk ) ;
bch2_quota_reservation_put ( c , inode , & res - > quota ) ;
}
static int bch2_page_reservation_get ( struct bch_fs * c ,
struct bch_inode_info * inode , struct page * page ,
struct bch2_page_reservation * res ,
2022-10-15 10:52:28 +03:00
unsigned offset , unsigned len )
2019-07-03 04:41:35 +03:00
{
struct bch_page_state * s = bch2_page_state_create ( page , 0 ) ;
2019-07-29 20:38:38 +03:00
unsigned i , disk_sectors = 0 , quota_sectors = 0 ;
2018-11-15 05:53:40 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
if ( ! s )
return - ENOMEM ;
2017-03-17 09:18:50 +03:00
2021-11-24 02:17:04 +03:00
BUG_ON ( ! s - > uptodate ) ;
2019-08-22 03:16:42 +03:00
for ( i = round_down ( offset , block_bytes ( c ) ) > > 9 ;
i < round_up ( offset + len , block_bytes ( c ) ) > > 9 ;
2019-07-29 20:38:38 +03:00
i + + ) {
disk_sectors + = sectors_to_reserve ( & s - > s [ i ] ,
res - > disk . nr_replicas ) ;
quota_sectors + = s - > s [ i ] . state = = SECTOR_UNALLOCATED ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
if ( disk_sectors ) {
2022-10-15 10:52:28 +03:00
ret = bch2_disk_reservation_add ( c , & res - > disk , disk_sectors , 0 ) ;
2019-07-29 20:38:38 +03:00
if ( unlikely ( ret ) )
return ret ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
if ( quota_sectors ) {
ret = bch2_quota_reservation_add ( c , inode , & res - > quota ,
2022-10-15 10:52:28 +03:00
quota_sectors , true ) ;
2019-07-29 20:38:38 +03:00
if ( unlikely ( ret ) ) {
struct disk_reservation tmp = {
. sectors = disk_sectors
} ;
bch2_disk_reservation_put ( c , & tmp ) ;
res - > disk . sectors - = disk_sectors ;
return ret ;
}
}
2017-03-17 09:18:50 +03:00
2019-07-03 00:25:05 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
}
static void bch2_clear_page_bits ( struct page * page )
{
struct bch_inode_info * inode = to_bch_ei ( page - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-03 04:41:35 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
2019-07-29 20:38:38 +03:00
struct disk_reservation disk_res = { 0 } ;
2019-07-03 04:41:35 +03:00
int i , dirty_sectors = 0 ;
2018-11-15 05:53:40 +03:00
2019-07-03 04:41:35 +03:00
if ( ! s )
2017-03-17 09:18:50 +03:00
return ;
2019-10-09 16:19:06 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
EBUG_ON ( PageWriteback ( page ) ) ;
2019-07-03 04:41:35 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + ) {
2019-07-29 20:38:38 +03:00
disk_res . sectors + = s - > s [ i ] . replicas_reserved ;
s - > s [ i ] . replicas_reserved = 0 ;
2021-11-24 01:05:56 +03:00
switch ( s - > s [ i ] . state ) {
case SECTOR_DIRTY :
2019-07-03 04:41:35 +03:00
s - > s [ i ] . state = SECTOR_UNALLOCATED ;
2021-11-24 01:05:56 +03:00
- - dirty_sectors ;
break ;
case SECTOR_DIRTY_RESERVED :
s - > s [ i ] . state = SECTOR_RESERVED ;
break ;
default :
break ;
2019-07-03 04:41:35 +03:00
}
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
bch2_disk_reservation_put ( c , & disk_res ) ;
2021-11-24 02:21:09 +03:00
i_sectors_acct ( c , inode , NULL , dirty_sectors ) ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
bch2_page_state_release ( page ) ;
2017-03-17 09:18:50 +03:00
}
2019-07-29 20:38:38 +03:00
static void bch2_set_page_dirty ( struct bch_fs * c ,
struct bch_inode_info * inode , struct page * page ,
struct bch2_page_reservation * res ,
unsigned offset , unsigned len )
2017-03-17 09:18:50 +03:00
{
2019-07-29 20:38:38 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
2019-07-03 04:41:35 +03:00
unsigned i , dirty_sectors = 0 ;
2017-03-17 09:18:50 +03:00
2019-09-27 02:09:08 +03:00
WARN_ON ( ( u64 ) page_offset ( page ) + offset + len >
round_up ( ( u64 ) i_size_read ( & inode - > v ) , block_bytes ( c ) ) ) ;
2019-09-24 20:33:11 +03:00
2019-10-09 16:19:06 +03:00
spin_lock ( & s - > lock ) ;
2019-08-22 03:16:42 +03:00
for ( i = round_down ( offset , block_bytes ( c ) ) > > 9 ;
i < round_up ( offset + len , block_bytes ( c ) ) > > 9 ;
2019-07-29 20:38:38 +03:00
i + + ) {
unsigned sectors = sectors_to_reserve ( & s - > s [ i ] ,
res - > disk . nr_replicas ) ;
2017-03-17 09:18:50 +03:00
2019-10-26 01:54:58 +03:00
/*
* This can happen if we race with the error path in
* bch2_writepage_io_done ( ) :
*/
sectors = min_t ( unsigned , sectors , res - > disk . sectors ) ;
2019-07-29 20:38:38 +03:00
s - > s [ i ] . replicas_reserved + = sectors ;
res - > disk . sectors - = sectors ;
2019-07-03 00:25:05 +03:00
2021-11-24 01:05:56 +03:00
switch ( s - > s [ i ] . state ) {
case SECTOR_UNALLOCATED :
s - > s [ i ] . state = SECTOR_DIRTY ;
2019-07-03 04:41:35 +03:00
dirty_sectors + + ;
2021-11-24 01:05:56 +03:00
break ;
case SECTOR_RESERVED :
s - > s [ i ] . state = SECTOR_DIRTY_RESERVED ;
break ;
default :
break ;
}
2019-07-03 04:41:35 +03:00
}
2019-10-09 16:19:06 +03:00
spin_unlock ( & s - > lock ) ;
2021-11-24 02:21:09 +03:00
i_sectors_acct ( c , inode , & res - > quota , dirty_sectors ) ;
2019-07-02 21:59:15 +03:00
2019-07-29 20:38:38 +03:00
if ( ! PageDirty ( page ) )
filemap_dirty_folio ( inode - > v . i_mapping , page_folio ( page ) ) ;
2017-03-17 09:18:50 +03:00
}
vm_fault_t bch2_page_fault ( struct vm_fault * vmf )
{
struct file * file = vmf - > vma - > vm_file ;
2020-11-11 20:33:12 +03:00
struct address_space * mapping = file - > f_mapping ;
struct address_space * fdm = faults_disabled_mapping ( ) ;
2017-03-17 09:18:50 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
int ret ;
2020-11-11 20:33:12 +03:00
if ( fdm = = mapping )
return VM_FAULT_SIGBUS ;
/* Lock ordering: */
if ( fdm > mapping ) {
struct bch_inode_info * fdm_host = to_bch_ei ( fdm - > host ) ;
2022-11-04 20:25:57 +03:00
if ( bch2_pagecache_add_tryget ( inode ) )
2020-11-11 20:33:12 +03:00
goto got_lock ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_put ( fdm_host ) ;
2020-11-11 20:33:12 +03:00
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
bch2_pagecache_add_put ( inode ) ;
2020-11-11 20:33:12 +03:00
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_get ( fdm_host ) ;
2020-11-11 20:33:12 +03:00
/* Signal that lock has been dropped: */
set_fdm_dropped_locks ( ) ;
return VM_FAULT_SIGBUS ;
}
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2020-11-11 20:33:12 +03:00
got_lock :
2017-03-17 09:18:50 +03:00
ret = filemap_fault ( vmf ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
vm_fault_t bch2_page_mkwrite ( struct vm_fault * vmf )
{
struct page * page = vmf - > page ;
struct file * file = vmf - > vma - > vm_file ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct address_space * mapping = file - > f_mapping ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation res ;
2019-09-20 01:05:04 +03:00
unsigned len ;
loff_t isize ;
2021-11-24 02:17:04 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
bch2_page_reservation_init ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
sb_start_pagefault ( inode - > v . i_sb ) ;
file_update_time ( file ) ;
/*
* Not strictly necessary , but helps avoid dio writes livelocking in
* write_invalidate_inode_pages_range ( ) - can drop this if / when we get
* a write_invalidate_inode_pages_range ( ) that works without dropping
* page lock before invalidating page
*/
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2017-03-17 09:18:50 +03:00
lock_page ( page ) ;
2019-09-20 01:05:04 +03:00
isize = i_size_read ( & inode - > v ) ;
if ( page - > mapping ! = mapping | | page_offset ( page ) > = isize ) {
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
ret = VM_FAULT_NOPAGE ;
goto out ;
}
2019-09-25 22:26:14 +03:00
len = min_t ( loff_t , PAGE_SIZE , isize - page_offset ( page ) ) ;
2019-09-20 01:05:04 +03:00
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
if ( bch2_page_state_set ( c , inode_inum ( inode ) , & page , 1 ) ) {
unlock_page ( page ) ;
ret = VM_FAULT_SIGBUS ;
goto out ;
}
}
2022-10-15 10:52:28 +03:00
if ( bch2_page_reservation_get ( c , inode , page , & res , 0 , len ) ) {
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
ret = VM_FAULT_SIGBUS ;
goto out ;
}
2019-09-20 01:05:04 +03:00
bch2_set_page_dirty ( c , inode , page , & res , 0 , len ) ;
2019-10-19 01:24:26 +03:00
bch2_page_reservation_put ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
wait_for_stable_page ( page ) ;
2021-11-24 02:17:04 +03:00
ret = VM_FAULT_LOCKED ;
2017-03-17 09:18:50 +03:00
out :
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2017-03-17 09:18:50 +03:00
sb_end_pagefault ( inode - > v . i_sb ) ;
2019-07-29 20:38:38 +03:00
2017-03-17 09:18:50 +03:00
return ret ;
}
void bch2_invalidate_folio ( struct folio * folio , size_t offset , size_t length )
{
if ( offset | | length < folio_size ( folio ) )
return ;
bch2_clear_page_bits ( & folio - > page ) ;
}
bool bch2_release_folio ( struct folio * folio , gfp_t gfp_mask )
{
2022-12-25 06:45:11 +03:00
if ( folio_test_dirty ( folio ) | | folio_test_writeback ( folio ) )
2017-03-17 09:18:50 +03:00
return false ;
bch2_clear_page_bits ( & folio - > page ) ;
return true ;
}
/* readpage(s): */
static void bch2_readpages_end_io ( struct bio * bio )
{
struct bvec_iter_all iter ;
struct bio_vec * bv ;
bio_for_each_segment_all ( bv , bio , iter ) {
struct page * page = bv - > bv_page ;
if ( ! bio - > bi_status ) {
SetPageUptodate ( page ) ;
} else {
ClearPageUptodate ( page ) ;
SetPageError ( page ) ;
}
unlock_page ( page ) ;
}
bio_put ( bio ) ;
}
struct readpages_iter {
struct address_space * mapping ;
struct page * * pages ;
unsigned nr_pages ;
unsigned idx ;
pgoff_t offset ;
} ;
static int readpages_iter_init ( struct readpages_iter * iter ,
struct readahead_control * ractl )
{
unsigned i , nr_pages = readahead_count ( ractl ) ;
memset ( iter , 0 , sizeof ( * iter ) ) ;
iter - > mapping = ractl - > mapping ;
iter - > offset = readahead_index ( ractl ) ;
iter - > nr_pages = nr_pages ;
iter - > pages = kmalloc_array ( nr_pages , sizeof ( struct page * ) , GFP_NOFS ) ;
if ( ! iter - > pages )
return - ENOMEM ;
2020-11-30 00:00:47 +03:00
nr_pages = __readahead_batch ( ractl , iter - > pages , nr_pages ) ;
2017-03-17 09:18:50 +03:00
for ( i = 0 ; i < nr_pages ; i + + ) {
2019-07-03 04:41:35 +03:00
__bch2_page_state_create ( iter - > pages [ i ] , __GFP_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
put_page ( iter - > pages [ i ] ) ;
}
return 0 ;
}
static inline struct page * readpage_iter_next ( struct readpages_iter * iter )
{
if ( iter - > idx > = iter - > nr_pages )
return NULL ;
EBUG_ON ( iter - > pages [ iter - > idx ] - > index ! = iter - > offset + iter - > idx ) ;
return iter - > pages [ iter - > idx ] ;
}
2019-11-10 00:01:15 +03:00
static bool extent_partial_reads_expensive ( struct bkey_s_c k )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
struct bch_extent_crc_unpacked crc ;
const union bch_extent_entry * i ;
bkey_for_each_crc ( k . k , ptrs , crc , i )
if ( crc . csum_type | | crc . compression_type )
return true ;
return false ;
}
2017-03-17 09:18:50 +03:00
static void readpage_bio_extend ( struct readpages_iter * iter ,
2019-08-16 16:59:56 +03:00
struct bio * bio ,
unsigned sectors_this_extent ,
2017-03-17 09:18:50 +03:00
bool get_more )
{
2019-08-16 16:59:56 +03:00
while ( bio_sectors ( bio ) < sectors_this_extent & &
2017-03-17 09:18:50 +03:00
bio - > bi_vcnt < bio - > bi_max_vecs ) {
2021-11-24 04:00:34 +03:00
pgoff_t page_offset = bio_end_sector ( bio ) > > PAGE_SECTORS_SHIFT ;
2017-03-17 09:18:50 +03:00
struct page * page = readpage_iter_next ( iter ) ;
int ret ;
if ( page ) {
if ( iter - > offset + iter - > idx ! = page_offset )
break ;
iter - > idx + + ;
} else {
if ( ! get_more )
break ;
page = xa_load ( & iter - > mapping - > i_pages , page_offset ) ;
if ( page & & ! xa_is_value ( page ) )
break ;
page = __page_cache_alloc ( readahead_gfp_mask ( iter - > mapping ) ) ;
if ( ! page )
break ;
2019-07-03 04:41:35 +03:00
if ( ! __bch2_page_state_create ( page , 0 ) ) {
put_page ( page ) ;
break ;
}
2017-03-17 09:18:50 +03:00
ret = add_to_page_cache_lru ( page , iter - > mapping ,
page_offset , GFP_NOFS ) ;
if ( ret ) {
2019-07-03 04:41:35 +03:00
__bch2_page_state_release ( page ) ;
2017-03-17 09:18:50 +03:00
put_page ( page ) ;
break ;
}
put_page ( page ) ;
}
2019-07-04 02:27:42 +03:00
BUG_ON ( ! bio_add_page ( bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
}
}
2021-03-13 04:30:39 +03:00
static void bchfs_read ( struct btree_trans * trans ,
struct bch_read_bio * rbio ,
subvol_inum inum ,
2017-03-17 09:18:50 +03:00
struct readpages_iter * readpages_iter )
{
2019-03-28 05:03:30 +03:00
struct bch_fs * c = trans - > c ;
2021-03-13 04:30:39 +03:00
struct btree_iter iter ;
2020-12-17 23:08:58 +03:00
struct bkey_buf sk ;
2017-03-17 09:18:50 +03:00
int flags = BCH_READ_RETRY_IF_STALE |
BCH_READ_MAY_PROMOTE ;
2021-03-13 04:30:39 +03:00
u32 snapshot ;
2019-08-16 16:59:56 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
rbio - > c = c ;
rbio - > start_time = local_clock ( ) ;
2021-03-13 04:30:39 +03:00
rbio - > subvol = inum . subvol ;
2019-11-10 00:01:15 +03:00
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_init ( & sk ) ;
2019-08-16 16:59:56 +03:00
retry :
2021-07-25 03:24:10 +03:00
bch2_trans_begin ( trans ) ;
2021-03-13 04:30:39 +03:00
iter = ( struct btree_iter ) { NULL } ;
2021-07-25 03:24:10 +03:00
2021-03-13 04:30:39 +03:00
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_extents ,
SPOS ( inum . inum , rbio - > bio . bi_iter . bi_sector , snapshot ) ,
2022-01-05 02:24:55 +03:00
BTREE_ITER_SLOTS ) ;
2017-03-17 09:18:50 +03:00
while ( 1 ) {
struct bkey_s_c k ;
2019-08-16 16:59:56 +03:00
unsigned bytes , sectors , offset_into_extent ;
2021-03-15 04:30:08 +03:00
enum btree_id data_btree = BTREE_ID_extents ;
2017-03-17 09:18:50 +03:00
2021-08-05 20:02:39 +03:00
/*
* read_extent - > io_time_reset may cause a transaction restart
* without returning an error , we need to check for that here :
*/
2022-07-18 06:06:38 +03:00
ret = bch2_trans_relock ( trans ) ;
if ( ret )
2021-08-05 20:02:39 +03:00
break ;
2021-03-13 04:30:39 +03:00
bch2_btree_iter_set_pos ( & iter ,
POS ( inum . inum , rbio - > bio . bi_iter . bi_sector ) ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
2019-08-16 16:59:56 +03:00
ret = bkey_err ( k ) ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
offset_into_extent = iter . pos . offset -
2019-07-09 19:56:43 +03:00
bkey_start_offset ( k . k ) ;
2019-08-16 16:59:56 +03:00
sectors = k . k - > size - offset_into_extent ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2020-10-25 03:56:47 +03:00
2021-03-15 04:30:08 +03:00
ret = bch2_read_indirect_extent ( trans , & data_btree ,
2020-05-22 17:50:05 +03:00
& offset_into_extent , & sk ) ;
2019-08-16 16:59:56 +03:00
if ( ret )
break ;
2020-10-25 03:56:47 +03:00
k = bkey_i_to_s_c ( sk . k ) ;
2019-08-16 16:59:56 +03:00
sectors = min ( sectors , k . k - > size - offset_into_extent ) ;
2019-11-10 00:01:15 +03:00
if ( readpages_iter )
readpage_bio_extend ( readpages_iter , & rbio - > bio , sectors ,
extent_partial_reads_expensive ( k ) ) ;
2017-03-17 09:18:50 +03:00
2019-08-16 16:59:56 +03:00
bytes = min ( sectors , bio_sectors ( & rbio - > bio ) ) < < 9 ;
2019-07-09 19:56:43 +03:00
swap ( rbio - > bio . bi_iter . bi_size , bytes ) ;
2017-03-17 09:18:50 +03:00
2019-07-09 19:56:43 +03:00
if ( rbio - > bio . bi_iter . bi_size = = bytes )
2017-03-17 09:18:50 +03:00
flags | = BCH_READ_LAST_FRAGMENT ;
2021-11-24 01:05:56 +03:00
bch2_bio_page_state_set ( & rbio - > bio , k ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
bch2_read_extent ( trans , rbio , iter . pos ,
2021-03-15 04:30:08 +03:00
data_btree , k , offset_into_extent , flags ) ;
2017-03-17 09:18:50 +03:00
if ( flags & BCH_READ_LAST_FRAGMENT )
2019-11-10 00:01:15 +03:00
break ;
2017-03-17 09:18:50 +03:00
2019-07-09 19:56:43 +03:00
swap ( rbio - > bio . bi_iter . bi_size , bytes ) ;
bio_advance ( & rbio - > bio , bytes ) ;
2021-11-24 03:00:23 +03:00
ret = btree_trans_too_many_iters ( trans ) ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
}
2021-03-13 04:30:39 +03:00
err :
bch2_trans_iter_exit ( trans , & iter ) ;
2019-08-16 16:59:56 +03:00
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2019-08-16 16:59:56 +03:00
goto retry ;
2019-11-10 00:01:15 +03:00
if ( ret ) {
2022-11-16 04:25:08 +03:00
bch_err_inum_offset_ratelimited ( c ,
iter . pos . inode ,
iter . pos . offset < < 9 ,
2020-12-03 21:57:22 +03:00
" read error %i from btree lookup " , ret ) ;
rbio - > bio . bi_status = BLK_STS_IOERR ;
2019-11-10 00:01:15 +03:00
bio_endio ( & rbio - > bio ) ;
}
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_exit ( & sk , c ) ;
2017-03-17 09:18:50 +03:00
}
void bch2_readahead ( struct readahead_control * ractl )
{
struct bch_inode_info * inode = to_bch_ei ( ractl - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2022-11-24 04:14:55 +03:00
struct bch_io_opts opts ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2017-03-17 09:18:50 +03:00
struct page * page ;
struct readpages_iter readpages_iter ;
int ret ;
2022-11-24 04:14:55 +03:00
bch2_inode_opts_get ( & opts , c , & inode - > ei_inode ) ;
2017-03-17 09:18:50 +03:00
ret = readpages_iter_init ( & readpages_iter , ractl ) ;
BUG_ON ( ret ) ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2017-03-17 09:18:50 +03:00
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2017-03-17 09:18:50 +03:00
while ( ( page = readpage_iter_next ( & readpages_iter ) ) ) {
pgoff_t index = readpages_iter . offset + readpages_iter . idx ;
unsigned n = min_t ( unsigned ,
readpages_iter . nr_pages -
readpages_iter . idx ,
BIO_MAX_VECS ) ;
struct bch_read_bio * rbio =
rbio_init ( bio_alloc_bioset ( NULL , n , REQ_OP_READ ,
GFP_NOFS , & c - > bio_read ) ,
opts ) ;
readpages_iter . idx + + ;
2021-11-24 04:00:34 +03:00
rbio - > bio . bi_iter . bi_sector = ( sector_t ) index < < PAGE_SECTORS_SHIFT ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_end_io = bch2_readpages_end_io ;
2019-07-04 02:27:42 +03:00
BUG_ON ( ! bio_add_page ( & rbio - > bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
bchfs_read ( & trans , rbio , inode_inum ( inode ) ,
2019-03-28 05:03:30 +03:00
& readpages_iter ) ;
2017-03-17 09:18:50 +03:00
}
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2019-03-25 22:10:15 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
kfree ( readpages_iter . pages ) ;
}
static void __bchfs_readpage ( struct bch_fs * c , struct bch_read_bio * rbio ,
2021-03-13 04:30:39 +03:00
subvol_inum inum , struct page * page )
2017-03-17 09:18:50 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
bch2_page_state_create ( page , __GFP_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_opf = REQ_OP_READ | REQ_SYNC ;
2019-07-29 19:24:36 +03:00
rbio - > bio . bi_iter . bi_sector =
2021-11-24 04:00:34 +03:00
( sector_t ) page - > index < < PAGE_SECTORS_SHIFT ;
2019-07-29 19:24:36 +03:00
BUG_ON ( ! bio_add_page ( & rbio - > bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-13 04:30:39 +03:00
bchfs_read ( & trans , rbio , inum , NULL ) ;
2019-03-25 22:10:15 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_read_single_page_end_io ( struct bio * bio )
{
complete ( bio - > bi_private ) ;
}
static int bch2_read_single_page ( struct page * page ,
struct address_space * mapping )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct bch_read_bio * rbio ;
2022-11-24 04:14:55 +03:00
struct bch_io_opts opts ;
2017-03-17 09:18:50 +03:00
int ret ;
DECLARE_COMPLETION_ONSTACK ( done ) ;
2022-11-24 04:14:55 +03:00
bch2_inode_opts_get ( & opts , c , & inode - > ei_inode ) ;
2017-03-17 09:18:50 +03:00
rbio = rbio_init ( bio_alloc_bioset ( NULL , 1 , REQ_OP_READ , GFP_NOFS , & c - > bio_read ) ,
2022-11-24 04:14:55 +03:00
opts ) ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_private = & done ;
rbio - > bio . bi_end_io = bch2_read_single_page_end_io ;
2021-03-13 04:30:39 +03:00
__bchfs_readpage ( c , rbio , inode_inum ( inode ) , page ) ;
2017-03-17 09:18:50 +03:00
wait_for_completion ( & done ) ;
ret = blk_status_to_errno ( rbio - > bio . bi_status ) ;
bio_put ( & rbio - > bio ) ;
if ( ret < 0 )
return ret ;
SetPageUptodate ( page ) ;
return 0 ;
}
int bch2_read_folio ( struct file * file , struct folio * folio )
{
struct page * page = & folio - > page ;
int ret ;
ret = bch2_read_single_page ( page , page - > mapping ) ;
folio_unlock ( folio ) ;
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* writepages: */
struct bch_writepage_state {
struct bch_writepage_io * io ;
struct bch_io_opts opts ;
} ;
static inline struct bch_writepage_state bch_writepage_state_init ( struct bch_fs * c ,
struct bch_inode_info * inode )
{
2022-11-24 04:14:55 +03:00
struct bch_writepage_state ret = { 0 } ;
bch2_inode_opts_get ( & ret . opts , c , & inode - > ei_inode ) ;
return ret ;
2017-03-17 09:18:50 +03:00
}
2022-10-29 09:47:33 +03:00
static void bch2_writepage_io_done ( struct bch_write_op * op )
2017-03-17 09:18:50 +03:00
{
2022-10-29 09:47:33 +03:00
struct bch_writepage_io * io =
container_of ( op , struct bch_writepage_io , op ) ;
2019-10-09 19:50:39 +03:00
struct bch_fs * c = io - > op . c ;
struct bio * bio = & io - > op . wbio . bio ;
2017-03-17 09:18:50 +03:00
struct bvec_iter_all iter ;
struct bio_vec * bvec ;
2019-08-13 10:16:52 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
2019-10-09 19:50:39 +03:00
if ( io - > op . error ) {
2020-12-03 22:27:20 +03:00
set_bit ( EI_INODE_ERROR , & io - > inode - > ei_flags ) ;
2019-04-18 03:34:24 +03:00
bio_for_each_segment_all ( bvec , bio , iter ) {
2019-08-13 10:16:52 +03:00
struct bch_page_state * s ;
2017-03-17 09:18:50 +03:00
SetPageError ( bvec - > bv_page ) ;
2019-04-18 03:34:24 +03:00
mapping_set_error ( bvec - > bv_page - > mapping , - EIO ) ;
2019-08-13 10:16:52 +03:00
2019-10-09 16:19:06 +03:00
s = __bch2_page_state ( bvec - > bv_page ) ;
spin_lock ( & s - > lock ) ;
2019-08-13 10:16:52 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + )
s - > s [ i ] . nr_replicas = 0 ;
2019-10-09 16:19:06 +03:00
spin_unlock ( & s - > lock ) ;
2019-04-18 03:34:24 +03:00
}
2017-03-17 09:18:50 +03:00
}
2019-11-10 00:43:16 +03:00
if ( io - > op . flags & BCH_WRITE_WROTE_DATA_INLINE ) {
bio_for_each_segment_all ( bvec , bio , iter ) {
struct bch_page_state * s ;
s = __bch2_page_state ( bvec - > bv_page ) ;
spin_lock ( & s - > lock ) ;
for ( i = 0 ; i < PAGE_SECTORS ; i + + )
s - > s [ i ] . nr_replicas = 0 ;
spin_unlock ( & s - > lock ) ;
}
}
2017-03-17 09:18:50 +03:00
/*
* racing with fallocate can cause us to add fewer sectors than
* expected - but we shouldn ' t add more sectors than expected :
*/
2022-03-17 03:31:15 +03:00
WARN_ON_ONCE ( io - > op . i_sectors_delta > 0 ) ;
2017-03-17 09:18:50 +03:00
/*
* ( error ( due to going RO ) halfway through a page can screw that up
* slightly )
* XXX wtf ?
2019-10-09 19:50:39 +03:00
BUG_ON ( io - > op . op . i_sectors_delta > = PAGE_SECTORS ) ;
2017-03-17 09:18:50 +03:00
*/
/*
* PageWriteback is effectively our ref on the inode - fixup i_blocks
* before calling end_page_writeback :
*/
2019-10-09 19:50:39 +03:00
i_sectors_acct ( c , io - > inode , NULL , io - > op . i_sectors_delta ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
bio_for_each_segment_all ( bvec , bio , iter ) {
struct bch_page_state * s = __bch2_page_state ( bvec - > bv_page ) ;
if ( atomic_dec_and_test ( & s - > write_count ) )
end_page_writeback ( bvec - > bv_page ) ;
}
2017-03-17 09:18:50 +03:00
2022-10-29 09:47:33 +03:00
bio_put ( & io - > op . wbio . bio ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_writepage_do_io ( struct bch_writepage_state * w )
{
struct bch_writepage_io * io = w - > io ;
w - > io = NULL ;
2022-10-29 09:47:33 +03:00
closure_call ( & io - > op . cl , bch2_write , NULL , NULL ) ;
2017-03-17 09:18:50 +03:00
}
/*
* Get a bch_writepage_io and add @ page to it - appending to an existing one if
* possible , else allocating a new one :
*/
static void bch2_writepage_io_alloc ( struct bch_fs * c ,
2019-11-14 03:45:48 +03:00
struct writeback_control * wbc ,
2017-03-17 09:18:50 +03:00
struct bch_writepage_state * w ,
struct bch_inode_info * inode ,
2019-07-29 19:24:36 +03:00
u64 sector ,
2017-03-17 09:18:50 +03:00
unsigned nr_replicas )
{
struct bch_write_op * op ;
w - > io = container_of ( bio_alloc_bioset ( NULL , BIO_MAX_VECS ,
REQ_OP_WRITE ,
GFP_NOFS ,
& c - > writepage_bioset ) ,
2019-10-09 19:50:39 +03:00
struct bch_writepage_io , op . wbio . bio ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 19:50:39 +03:00
w - > io - > inode = inode ;
op = & w - > io - > op ;
bch2_write_op_init ( op , c , w - > opts ) ;
op - > target = w - > opts . foreground_target ;
2017-03-17 09:18:50 +03:00
op - > nr_replicas = nr_replicas ;
op - > res . nr_replicas = nr_replicas ;
op - > write_point = writepoint_hashed ( inode - > ei_last_dirtied ) ;
2021-03-13 04:30:39 +03:00
op - > subvol = inode - > ei_subvol ;
2019-07-29 19:24:36 +03:00
op - > pos = POS ( inode - > v . i_ino , sector ) ;
2022-10-29 09:47:33 +03:00
op - > end_io = bch2_writepage_io_done ;
2019-07-29 19:24:36 +03:00
op - > wbio . bio . bi_iter . bi_sector = sector ;
2019-11-14 03:45:48 +03:00
op - > wbio . bio . bi_opf = wbc_to_write_flags ( wbc ) ;
2017-03-17 09:18:50 +03:00
}
static int __bch2_writepage ( struct folio * folio ,
struct writeback_control * wbc ,
void * data )
{
struct page * page = & folio - > page ;
struct bch_inode_info * inode = to_bch_ei ( page - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct bch_writepage_state * w = data ;
2019-07-29 19:24:36 +03:00
struct bch_page_state * s , orig ;
unsigned i , offset , nr_replicas_this_write = U32_MAX ;
2017-03-17 09:18:50 +03:00
loff_t i_size = i_size_read ( & inode - > v ) ;
pgoff_t end_index = i_size > > PAGE_SHIFT ;
2019-07-02 21:59:15 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
EBUG_ON ( ! PageUptodate ( page ) ) ;
/* Is the page fully inside i_size? */
if ( page - > index < end_index )
goto do_io ;
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & ( PAGE_SIZE - 1 ) ;
if ( page - > index > end_index | | ! offset ) {
unlock_page ( page ) ;
return 0 ;
}
/*
* The page straddles i_size . It must be zeroed out on each and every
* writepage invocation because it may be mmapped . " A file is mapped
* in multiples of the page size . For a file that is not a multiple of
* the page size , the remaining memory is zeroed when mapped , and
* writes to that region are not written out to the file . "
*/
zero_user_segment ( page , offset , PAGE_SIZE ) ;
do_io :
2019-07-03 04:41:35 +03:00
s = bch2_page_state_create ( page , __GFP_NOFAIL ) ;
2018-11-15 05:53:40 +03:00
2021-11-11 21:02:03 +03:00
/*
* Things get really hairy with errors during writeback :
*/
ret = bch2_get_page_disk_reservation ( c , inode , page , false ) ;
BUG_ON ( ret ) ;
2018-11-15 05:53:40 +03:00
2019-07-29 19:24:36 +03:00
/* Before unlocking the page, get copy of reservations: */
2021-11-11 21:02:03 +03:00
spin_lock ( & s - > lock ) ;
2019-07-29 19:24:36 +03:00
orig = * s ;
2021-11-11 21:02:03 +03:00
spin_unlock ( & s - > lock ) ;
2019-07-29 19:24:36 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + ) {
2019-07-30 21:18:29 +03:00
if ( s - > s [ i ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
continue ;
2019-07-03 04:41:35 +03:00
nr_replicas_this_write =
min_t ( unsigned , nr_replicas_this_write ,
s - > s [ i ] . nr_replicas +
s - > s [ i ] . replicas_reserved ) ;
2019-07-29 19:24:36 +03:00
}
2019-07-02 21:59:15 +03:00
2019-07-03 04:41:35 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + ) {
2019-07-30 21:18:29 +03:00
if ( s - > s [ i ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
continue ;
2019-07-03 04:41:35 +03:00
s - > s [ i ] . nr_replicas = w - > opts . compression
? 0 : nr_replicas_this_write ;
2019-07-02 21:59:15 +03:00
2019-07-03 04:41:35 +03:00
s - > s [ i ] . replicas_reserved = 0 ;
s - > s [ i ] . state = SECTOR_ALLOCATED ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
BUG_ON ( atomic_read ( & s - > write_count ) ) ;
atomic_set ( & s - > write_count , 1 ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
2019-07-29 19:24:36 +03:00
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
2019-07-29 19:24:36 +03:00
offset = 0 ;
while ( 1 ) {
2021-11-11 21:02:03 +03:00
unsigned sectors = 0 , dirty_sectors = 0 , reserved_sectors = 0 ;
2019-07-29 19:24:36 +03:00
u64 sector ;
while ( offset < PAGE_SECTORS & &
2019-07-30 21:18:29 +03:00
orig . s [ offset ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
offset + + ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( offset = = PAGE_SECTORS )
break ;
while ( offset + sectors < PAGE_SECTORS & &
2021-11-11 21:02:03 +03:00
orig . s [ offset + sectors ] . state > = SECTOR_DIRTY ) {
reserved_sectors + = orig . s [ offset + sectors ] . replicas_reserved ;
dirty_sectors + = orig . s [ offset + sectors ] . state = = SECTOR_DIRTY ;
2019-07-29 19:24:36 +03:00
sectors + + ;
}
2021-11-11 21:02:03 +03:00
BUG_ON ( ! sectors ) ;
2021-11-24 04:00:34 +03:00
sector = ( ( u64 ) page - > index < < PAGE_SECTORS_SHIFT ) + offset ;
2019-07-29 19:24:36 +03:00
if ( w - > io & &
2019-10-09 19:50:39 +03:00
( w - > io - > op . res . nr_replicas ! = nr_replicas_this_write | |
bio_full ( & w - > io - > op . wbio . bio , PAGE_SIZE ) | |
2020-04-29 22:28:25 +03:00
w - > io - > op . wbio . bio . bi_iter . bi_size + ( sectors < < 9 ) > =
( BIO_MAX_VECS * PAGE_SIZE ) | |
2019-10-09 19:50:39 +03:00
bio_end_sector ( & w - > io - > op . wbio . bio ) ! = sector ) )
2019-07-29 19:24:36 +03:00
bch2_writepage_do_io ( w ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( ! w - > io )
2019-11-14 03:45:48 +03:00
bch2_writepage_io_alloc ( c , wbc , w , inode , sector ,
2019-07-29 19:24:36 +03:00
nr_replicas_this_write ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
atomic_inc ( & s - > write_count ) ;
2019-10-09 19:50:39 +03:00
BUG_ON ( inode ! = w - > io - > inode ) ;
BUG_ON ( ! bio_add_page ( & w - > io - > op . wbio . bio , page ,
2019-07-29 19:24:36 +03:00
sectors < < 9 , offset < < 9 ) ) ;
2019-09-20 01:05:04 +03:00
/* Check for writing past i_size: */
2022-11-15 23:57:07 +03:00
WARN_ONCE ( ( bio_end_sector ( & w - > io - > op . wbio . bio ) < < 9 ) >
round_up ( i_size , block_bytes ( c ) ) & &
! test_bit ( BCH_FS_EMERGENCY_RO , & c - > flags ) ,
" writing past i_size: %llu > %llu (unrounded %llu) \n " ,
bio_end_sector ( & w - > io - > op . wbio . bio ) < < 9 ,
round_up ( i_size , block_bytes ( c ) ) ,
i_size ) ;
2019-09-20 01:05:04 +03:00
2019-10-09 19:50:39 +03:00
w - > io - > op . res . sectors + = reserved_sectors ;
w - > io - > op . i_sectors_delta - = dirty_sectors ;
2019-07-29 19:24:36 +03:00
w - > io - > op . new_i_size = i_size ;
offset + = sectors ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( atomic_dec_and_test ( & s - > write_count ) )
end_page_writeback ( page ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
int bch2_writepages ( struct address_space * mapping , struct writeback_control * wbc )
{
struct bch_fs * c = mapping - > host - > i_sb - > s_fs_info ;
struct bch_writepage_state w =
bch_writepage_state_init ( c , to_bch_ei ( mapping - > host ) ) ;
struct blk_plug plug ;
int ret ;
blk_start_plug ( & plug ) ;
ret = write_cache_pages ( mapping , wbc , __bch2_writepage , & w ) ;
if ( w . io )
bch2_writepage_do_io ( & w ) ;
blk_finish_plug ( & plug ) ;
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* buffered writes: */
int bch2_write_begin ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len ,
struct page * * pagep , void * * fsdata )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res ;
2017-03-17 09:18:50 +03:00
pgoff_t index = pos > > PAGE_SHIFT ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
struct page * page ;
int ret = - ENOMEM ;
2019-07-29 20:38:38 +03:00
res = kmalloc ( sizeof ( * res ) , GFP_KERNEL ) ;
if ( ! res )
return - ENOMEM ;
bch2_page_reservation_init ( c , inode , res ) ;
* fsdata = res ;
2017-03-17 09:18:50 +03:00
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2017-03-17 09:18:50 +03:00
page = grab_cache_page_write_begin ( mapping , index ) ;
if ( ! page )
goto err_unlock ;
if ( PageUptodate ( page ) )
goto out ;
/* If we're writing entire page, don't need to read it in first: */
if ( len = = PAGE_SIZE )
goto out ;
if ( ! offset & & pos + len > = inode - > v . i_size ) {
zero_user_segment ( page , len , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
goto out ;
}
if ( index > inode - > v . i_size > > PAGE_SHIFT ) {
zero_user_segments ( page , 0 , offset , offset + len , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
goto out ;
}
readpage :
ret = bch2_read_single_page ( page , mapping ) ;
if ( ret )
goto err ;
out :
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
ret = bch2_page_state_set ( c , inode_inum ( inode ) , & page , 1 ) ;
if ( ret )
2022-10-13 07:24:17 +03:00
goto err ;
2021-11-24 02:17:04 +03:00
}
2022-10-15 10:52:28 +03:00
ret = bch2_page_reservation_get ( c , inode , page , res , offset , len ) ;
2017-03-17 09:18:50 +03:00
if ( ret ) {
if ( ! PageUptodate ( page ) ) {
/*
* If the page hasn ' t been read in , we won ' t know if we
* actually need a reservation - we don ' t actually need
* to read here , we just need to check if the page is
* fully backed by uncompressed data :
*/
goto readpage ;
}
goto err ;
}
* pagep = page ;
return 0 ;
err :
unlock_page ( page ) ;
put_page ( page ) ;
* pagep = NULL ;
err_unlock :
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2019-07-29 20:38:38 +03:00
kfree ( res ) ;
* fsdata = NULL ;
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
int bch2_write_end ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len , unsigned copied ,
struct page * page , void * fsdata )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res = fsdata ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
2017-03-17 09:18:50 +03:00
lockdep_assert_held ( & inode - > v . i_rwsem ) ;
if ( unlikely ( copied < len & & ! PageUptodate ( page ) ) ) {
/*
* The page needs to be read in , but that would destroy
* our partial write - simplest thing is to just force
* userspace to redo the write :
*/
zero_user ( page , 0 , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
copied = 0 ;
}
spin_lock ( & inode - > v . i_lock ) ;
if ( pos + copied > inode - > v . i_size )
i_size_write ( & inode - > v , pos + copied ) ;
spin_unlock ( & inode - > v . i_lock ) ;
if ( copied ) {
if ( ! PageUptodate ( page ) )
SetPageUptodate ( page ) ;
2019-07-29 20:38:38 +03:00
bch2_set_page_dirty ( c , inode , page , res , offset , copied ) ;
2017-03-17 09:18:50 +03:00
inode - > ei_last_dirtied = ( unsigned long ) current ;
}
unlock_page ( page ) ;
put_page ( page ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
bch2_page_reservation_put ( c , inode , res ) ;
kfree ( res ) ;
2017-03-17 09:18:50 +03:00
return copied ;
}
# define WRITE_BATCH_PAGES 32
static int __bch2_buffered_write ( struct bch_inode_info * inode ,
struct address_space * mapping ,
struct iov_iter * iter ,
loff_t pos , unsigned len )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct page * pages [ WRITE_BATCH_PAGES ] ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation res ;
2017-03-17 09:18:50 +03:00
unsigned long index = pos > > PAGE_SHIFT ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
unsigned nr_pages = DIV_ROUND_UP ( offset + len , PAGE_SIZE ) ;
2019-07-29 20:38:38 +03:00
unsigned i , reserved = 0 , set_dirty = 0 ;
unsigned copied = 0 , nr_pages_copied = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
BUG_ON ( ! len ) ;
BUG_ON ( nr_pages > ARRAY_SIZE ( pages ) ) ;
2019-07-29 20:38:38 +03:00
bch2_page_reservation_init ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
for ( i = 0 ; i < nr_pages ; i + + ) {
pages [ i ] = grab_cache_page_write_begin ( mapping , index + i ) ;
if ( ! pages [ i ] ) {
nr_pages = i ;
2019-10-02 01:51:10 +03:00
if ( ! i ) {
ret = - ENOMEM ;
goto out ;
}
len = min_t ( unsigned , len ,
nr_pages * PAGE_SIZE - offset ) ;
break ;
2017-03-17 09:18:50 +03:00
}
}
if ( offset & & ! PageUptodate ( pages [ 0 ] ) ) {
ret = bch2_read_single_page ( pages [ 0 ] , mapping ) ;
if ( ret )
goto out ;
}
if ( ( pos + len ) & ( PAGE_SIZE - 1 ) & &
! PageUptodate ( pages [ nr_pages - 1 ] ) ) {
if ( ( index + nr_pages - 1 ) < < PAGE_SHIFT > = inode - > v . i_size ) {
zero_user ( pages [ nr_pages - 1 ] , 0 , PAGE_SIZE ) ;
} else {
ret = bch2_read_single_page ( pages [ nr_pages - 1 ] , mapping ) ;
if ( ret )
goto out ;
}
}
2019-07-29 20:38:38 +03:00
while ( reserved < len ) {
2021-11-24 02:17:04 +03:00
unsigned i = ( offset + reserved ) > > PAGE_SHIFT ;
struct page * page = pages [ i ] ;
2019-07-29 20:38:38 +03:00
unsigned pg_offset = ( offset + reserved ) & ( PAGE_SIZE - 1 ) ;
unsigned pg_len = min_t ( unsigned , len - reserved ,
PAGE_SIZE - pg_offset ) ;
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
ret = bch2_page_state_set ( c , inode_inum ( inode ) ,
pages + i , nr_pages - i ) ;
if ( ret )
goto out ;
2017-03-17 09:18:50 +03:00
}
2022-10-24 05:01:50 +03:00
/*
* XXX : per POSIX and fstests generic / 275 , on - ENOSPC we ' re
* supposed to write as much as we have disk space for .
*
* On failure here we should still write out a partial page if
* we aren ' t completely out of disk space - we don ' t do that
* yet :
*/
2021-11-24 02:17:04 +03:00
ret = bch2_page_reservation_get ( c , inode , page , & res ,
2022-10-15 10:52:28 +03:00
pg_offset , pg_len ) ;
2022-10-24 05:01:50 +03:00
if ( unlikely ( ret ) ) {
if ( ! reserved )
goto out ;
break ;
}
2019-07-29 20:38:38 +03:00
reserved + = pg_len ;
2017-03-17 09:18:50 +03:00
}
if ( mapping_writably_mapped ( mapping ) )
for ( i = 0 ; i < nr_pages ; i + + )
flush_dcache_page ( pages [ i ] ) ;
2022-10-24 05:01:50 +03:00
while ( copied < reserved ) {
2017-03-17 09:18:50 +03:00
struct page * page = pages [ ( offset + copied ) > > PAGE_SHIFT ] ;
unsigned pg_offset = ( offset + copied ) & ( PAGE_SIZE - 1 ) ;
2022-10-24 05:01:50 +03:00
unsigned pg_len = min_t ( unsigned , reserved - copied ,
2019-07-29 20:38:38 +03:00
PAGE_SIZE - pg_offset ) ;
2017-03-17 09:18:50 +03:00
unsigned pg_copied = copy_page_from_iter_atomic ( page ,
2019-07-29 20:38:38 +03:00
pg_offset , pg_len , iter ) ;
if ( ! pg_copied )
break ;
2017-03-17 09:18:50 +03:00
2020-07-09 20:54:58 +03:00
if ( ! PageUptodate ( page ) & &
pg_copied ! = PAGE_SIZE & &
pos + copied + pg_copied < inode - > v . i_size ) {
zero_user ( page , 0 , PAGE_SIZE ) ;
break ;
}
2017-03-17 09:18:50 +03:00
flush_dcache_page ( page ) ;
copied + = pg_copied ;
2020-07-09 20:54:58 +03:00
if ( pg_copied ! = pg_len )
break ;
2017-03-17 09:18:50 +03:00
}
if ( ! copied )
goto out ;
2019-09-27 02:09:08 +03:00
spin_lock ( & inode - > v . i_lock ) ;
if ( pos + copied > inode - > v . i_size )
i_size_write ( & inode - > v , pos + copied ) ;
spin_unlock ( & inode - > v . i_lock ) ;
2019-07-29 20:38:38 +03:00
while ( set_dirty < copied ) {
struct page * page = pages [ ( offset + set_dirty ) > > PAGE_SHIFT ] ;
unsigned pg_offset = ( offset + set_dirty ) & ( PAGE_SIZE - 1 ) ;
unsigned pg_len = min_t ( unsigned , copied - set_dirty ,
PAGE_SIZE - pg_offset ) ;
if ( ! PageUptodate ( page ) )
SetPageUptodate ( page ) ;
bch2_set_page_dirty ( c , inode , page , & res , pg_offset , pg_len ) ;
unlock_page ( page ) ;
put_page ( page ) ;
set_dirty + = pg_len ;
}
2019-09-27 02:09:08 +03:00
nr_pages_copied = DIV_ROUND_UP ( offset + copied , PAGE_SIZE ) ;
inode - > ei_last_dirtied = ( unsigned long ) current ;
2019-07-29 20:38:38 +03:00
out :
2017-03-17 09:18:50 +03:00
for ( i = nr_pages_copied ; i < nr_pages ; i + + ) {
unlock_page ( pages [ i ] ) ;
put_page ( pages [ i ] ) ;
}
2019-07-29 20:38:38 +03:00
bch2_page_reservation_put ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
return copied ? : ret ;
}
static ssize_t bch2_buffered_write ( struct kiocb * iocb , struct iov_iter * iter )
{
struct file * file = iocb - > ki_filp ;
struct address_space * mapping = file - > f_mapping ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
loff_t pos = iocb - > ki_pos ;
ssize_t written = 0 ;
int ret = 0 ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2017-03-17 09:18:50 +03:00
do {
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
unsigned bytes = min_t ( unsigned long , iov_iter_count ( iter ) ,
PAGE_SIZE * WRITE_BATCH_PAGES - offset ) ;
again :
/*
* Bring in the user page that we will copy from _first_ .
* Otherwise there ' s a nasty deadlock on copying from the
* same page as we ' re writing to , without it being marked
* up - to - date .
*
* Not only is this an optimisation , but it is also required
* to check that the address is actually valid , when atomic
* usercopies are used , below .
*/
if ( unlikely ( fault_in_iov_iter_readable ( iter , bytes ) ) ) {
bytes = min_t ( unsigned long , iov_iter_count ( iter ) ,
PAGE_SIZE - offset ) ;
if ( unlikely ( fault_in_iov_iter_readable ( iter , bytes ) ) ) {
ret = - EFAULT ;
break ;
}
}
if ( unlikely ( fatal_signal_pending ( current ) ) ) {
ret = - EINTR ;
break ;
}
ret = __bch2_buffered_write ( inode , mapping , iter , pos , bytes ) ;
if ( unlikely ( ret < 0 ) )
break ;
cond_resched ( ) ;
if ( unlikely ( ret = = 0 ) ) {
/*
* If we were unable to copy any data at all , we must
* fall back to a single segment length write .
*
* If we didn ' t fallback here , we could livelock
* because not all segments in the iov can be copied at
* once without a pagefault .
*/
bytes = min_t ( unsigned long , PAGE_SIZE - offset ,
iov_iter_single_seg_count ( iter ) ) ;
goto again ;
}
pos + = ret ;
written + = ret ;
2020-07-09 20:54:58 +03:00
ret = 0 ;
2017-03-17 09:18:50 +03:00
balance_dirty_pages_ratelimited ( mapping ) ;
} while ( iov_iter_count ( iter ) ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2017-03-17 09:18:50 +03:00
return written ? written : ret ;
}
/* O_DIRECT reads */
2021-01-21 22:42:23 +03:00
static void bio_check_or_release ( struct bio * bio , bool check_dirty )
{
if ( check_dirty ) {
bio_check_pages_dirty ( bio ) ;
} else {
bio_release_pages ( bio , false ) ;
bio_put ( bio ) ;
}
}
2017-03-17 09:18:50 +03:00
static void bch2_dio_read_complete ( struct closure * cl )
{
struct dio_read * dio = container_of ( cl , struct dio_read , cl ) ;
dio - > req - > ki_complete ( dio - > req , dio - > ret ) ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( & dio - > rbio . bio , dio - > should_dirty ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_direct_IO_read_endio ( struct bio * bio )
{
struct dio_read * dio = bio - > bi_private ;
if ( bio - > bi_status )
dio - > ret = blk_status_to_errno ( bio - > bi_status ) ;
closure_put ( & dio - > cl ) ;
}
static void bch2_direct_IO_read_split_endio ( struct bio * bio )
{
2021-01-21 22:42:23 +03:00
struct dio_read * dio = bio - > bi_private ;
bool should_dirty = dio - > should_dirty ;
2017-03-17 09:18:50 +03:00
bch2_direct_IO_read_endio ( bio ) ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( bio , should_dirty ) ;
2017-03-17 09:18:50 +03:00
}
static int bch2_direct_IO_read ( struct kiocb * req , struct iov_iter * iter )
{
struct file * file = req - > ki_filp ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2022-11-24 04:14:55 +03:00
struct bch_io_opts opts ;
2017-03-17 09:18:50 +03:00
struct dio_read * dio ;
struct bio * bio ;
loff_t offset = req - > ki_pos ;
bool sync = is_sync_kiocb ( req ) ;
size_t shorten ;
ssize_t ret ;
2022-11-24 04:14:55 +03:00
bch2_inode_opts_get ( & opts , c , & inode - > ei_inode ) ;
2017-03-17 09:18:50 +03:00
if ( ( offset | iter - > count ) & ( block_bytes ( c ) - 1 ) )
return - EINVAL ;
ret = min_t ( loff_t , iter - > count ,
max_t ( loff_t , 0 , i_size_read ( & inode - > v ) - offset ) ) ;
if ( ! ret )
return ret ;
shorten = iov_iter_count ( iter ) - round_up ( ret , block_bytes ( c ) ) ;
iter - > count - = shorten ;
bio = bio_alloc_bioset ( NULL ,
2022-03-08 21:52:58 +03:00
bio_iov_vecs_to_alloc ( iter , BIO_MAX_VECS ) ,
2017-03-17 09:18:50 +03:00
REQ_OP_READ ,
GFP_KERNEL ,
& c - > dio_read_bioset ) ;
bio - > bi_end_io = bch2_direct_IO_read_endio ;
dio = container_of ( bio , struct dio_read , rbio . bio ) ;
closure_init ( & dio - > cl , NULL ) ;
/*
* this is a _really_ horrible hack just to avoid an atomic sub at the
* end :
*/
if ( ! sync ) {
set_closure_fn ( & dio - > cl , bch2_dio_read_complete , NULL ) ;
atomic_set ( & dio - > cl . remaining ,
CLOSURE_REMAINING_INITIALIZER -
CLOSURE_RUNNING +
CLOSURE_DESTRUCTOR ) ;
} else {
atomic_set ( & dio - > cl . remaining ,
CLOSURE_REMAINING_INITIALIZER + 1 ) ;
}
dio - > req = req ;
dio - > ret = ret ;
2021-01-21 22:42:23 +03:00
/*
* This is one of the sketchier things I ' ve encountered : we have to skip
* the dirtying of requests that are internal from the kernel ( i . e . from
* loopback ) , because we ' ll deadlock on page_lock .
*/
dio - > should_dirty = iter_is_iovec ( iter ) ;
2017-03-17 09:18:50 +03:00
goto start ;
while ( iter - > count ) {
bio = bio_alloc_bioset ( NULL ,
2022-03-08 21:52:58 +03:00
bio_iov_vecs_to_alloc ( iter , BIO_MAX_VECS ) ,
2017-03-17 09:18:50 +03:00
REQ_OP_READ ,
GFP_KERNEL ,
& c - > bio_read ) ;
bio - > bi_end_io = bch2_direct_IO_read_split_endio ;
start :
bio - > bi_opf = REQ_OP_READ | REQ_SYNC ;
bio - > bi_iter . bi_sector = offset > > 9 ;
bio - > bi_private = dio ;
ret = bio_iov_iter_get_pages ( bio , iter ) ;
if ( ret < 0 ) {
/* XXX: fault inject this path */
bio - > bi_status = BLK_STS_RESOURCE ;
bio_endio ( bio ) ;
break ;
}
offset + = bio - > bi_iter . bi_size ;
2021-01-21 22:42:23 +03:00
if ( dio - > should_dirty )
bio_set_pages_dirty ( bio ) ;
2017-03-17 09:18:50 +03:00
if ( iter - > count )
closure_get ( & dio - > cl ) ;
2021-03-13 04:30:39 +03:00
bch2_read ( c , rbio_init ( bio , opts ) , inode_inum ( inode ) ) ;
2017-03-17 09:18:50 +03:00
}
iter - > count + = shorten ;
if ( sync ) {
closure_sync ( & dio - > cl ) ;
closure_debug_destroy ( & dio - > cl ) ;
ret = dio - > ret ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( & dio - > rbio . bio , dio - > should_dirty ) ;
2017-03-17 09:18:50 +03:00
return ret ;
} else {
return - EIOCBQUEUED ;
}
}
ssize_t bch2_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
struct file * file = iocb - > ki_filp ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct address_space * mapping = file - > f_mapping ;
size_t count = iov_iter_count ( iter ) ;
ssize_t ret ;
if ( ! count )
return 0 ; /* skip atime */
if ( iocb - > ki_flags & IOCB_DIRECT ) {
struct blk_plug plug ;
2022-11-02 23:45:28 +03:00
if ( unlikely ( mapping - > nrpages ) ) {
ret = filemap_write_and_wait_range ( mapping ,
iocb - > ki_pos ,
iocb - > ki_pos + count - 1 ) ;
if ( ret < 0 )
2022-09-18 22:43:50 +03:00
goto out ;
2022-11-02 23:45:28 +03:00
}
2017-03-17 09:18:50 +03:00
file_accessed ( file ) ;
blk_start_plug ( & plug ) ;
ret = bch2_direct_IO_read ( iocb , iter ) ;
blk_finish_plug ( & plug ) ;
if ( ret > = 0 )
iocb - > ki_pos + = ret ;
} else {
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_get ( inode ) ;
2017-03-17 09:18:50 +03:00
ret = generic_file_read_iter ( iocb , iter ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_add_put ( inode ) ;
2017-03-17 09:18:50 +03:00
}
2022-09-18 22:43:50 +03:00
out :
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* O_DIRECT writes */
2021-03-16 07:28:17 +03:00
static bool bch2_check_range_allocated ( struct bch_fs * c , subvol_inum inum ,
u64 offset , u64 size ,
unsigned nr_replicas , bool compressed )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
u64 end = offset + size ;
u32 snapshot ;
bool ret = true ;
int err ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
retry :
bch2_trans_begin ( & trans ) ;
err = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( err )
goto err ;
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
2021-03-16 07:28:17 +03:00
SPOS ( inum . inum , offset , snapshot ) ,
BTREE_ITER_SLOTS , k , err ) {
2022-11-24 11:12:22 +03:00
if ( bkey_ge ( bkey_start_pos ( k . k ) , POS ( inum . inum , end ) ) )
2021-03-16 07:28:17 +03:00
break ;
2021-03-13 04:30:39 +03:00
if ( k . k - > p . snapshot ! = snapshot | |
nr_replicas > bch2_bkey_replicas ( c , k ) | |
2021-03-16 07:28:17 +03:00
( ! compressed & & bch2_bkey_sectors_compressed ( k ) ) ) {
ret = false ;
break ;
}
}
offset = iter . pos . offset ;
bch2_trans_iter_exit ( & trans , & iter ) ;
err :
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( err , BCH_ERR_transaction_restart ) )
2021-03-16 07:28:17 +03:00
goto retry ;
bch2_trans_exit ( & trans ) ;
return err ? false : ret ;
}
2022-11-01 03:30:27 +03:00
static noinline bool bch2_dio_write_check_allocated ( struct dio_write * dio )
{
struct bch_fs * c = dio - > op . c ;
struct bch_inode_info * inode = dio - > inode ;
struct bio * bio = & dio - > op . wbio . bio ;
return bch2_check_range_allocated ( c , inode_inum ( inode ) ,
dio - > op . pos . offset , bio_sectors ( bio ) ,
dio - > op . opts . data_replicas ,
dio - > op . opts . compression ! = 0 ) ;
}
2022-11-03 07:29:43 +03:00
static void bch2_dio_write_loop_async ( struct bch_write_op * ) ;
static __always_inline long bch2_dio_write_done ( struct dio_write * dio ) ;
2017-03-17 09:18:50 +03:00
/*
* We ' re going to return - EIOCBQUEUED , but we haven ' t finished consuming the
* iov_iter yet , so we need to stash a copy of the iovec : it might be on the
* caller ' s stack , we ' re not guaranteed that it will live for the duration of
* the IO :
*/
static noinline int bch2_dio_write_copy_iov ( struct dio_write * dio )
{
struct iovec * iov = dio - > inline_vecs ;
/*
* iov_iter has a single embedded iovec - nothing to do :
*/
if ( iter_is_ubuf ( & dio - > iter ) )
return 0 ;
/*
* We don ' t currently handle non - iovec iov_iters here - return an error ,
* and we ' ll fall back to doing the IO synchronously :
*/
if ( ! iter_is_iovec ( & dio - > iter ) )
return - 1 ;
if ( dio - > iter . nr_segs > ARRAY_SIZE ( dio - > inline_vecs ) ) {
iov = kmalloc_array ( dio - > iter . nr_segs , sizeof ( * iov ) ,
GFP_KERNEL ) ;
if ( unlikely ( ! iov ) )
return - ENOMEM ;
dio - > free_iov = true ;
}
memcpy ( iov , dio - > iter . __iov , dio - > iter . nr_segs * sizeof ( * iov ) ) ;
dio - > iter . __iov = iov ;
return 0 ;
}
2022-11-03 07:29:43 +03:00
static void bch2_dio_write_flush_done ( struct closure * cl )
{
struct dio_write * dio = container_of ( cl , struct dio_write , op . cl ) ;
struct bch_fs * c = dio - > op . c ;
closure_debug_destroy ( cl ) ;
dio - > op . error = bch2_journal_error ( & c - > journal ) ;
bch2_dio_write_done ( dio ) ;
}
static noinline void bch2_dio_write_flush ( struct dio_write * dio )
{
struct bch_fs * c = dio - > op . c ;
struct bch_inode_unpacked inode ;
int ret ;
dio - > flush = 0 ;
closure_init ( & dio - > op . cl , NULL ) ;
if ( ! dio - > op . error ) {
ret = bch2_inode_find_by_inum ( c , inode_inum ( dio - > inode ) , & inode ) ;
if ( ret )
dio - > op . error = ret ;
else
bch2_journal_flush_seq_async ( & c - > journal , inode . bi_journal_seq , & dio - > op . cl ) ;
}
if ( dio - > sync ) {
closure_sync ( & dio - > op . cl ) ;
closure_debug_destroy ( & dio - > op . cl ) ;
} else {
continue_at ( & dio - > op . cl , bch2_dio_write_flush_done , NULL ) ;
}
}
2020-06-30 01:22:06 +03:00
2022-11-01 03:30:27 +03:00
static __always_inline long bch2_dio_write_done ( struct dio_write * dio )
{
struct kiocb * req = dio - > req ;
struct bch_inode_info * inode = dio - > inode ;
bool sync = dio - > sync ;
2022-11-03 07:29:43 +03:00
long ret ;
if ( unlikely ( dio - > flush ) ) {
bch2_dio_write_flush ( dio ) ;
if ( ! sync )
return - EIOCBQUEUED ;
}
2022-11-01 03:30:27 +03:00
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_put ( inode ) ;
2022-11-01 03:30:27 +03:00
if ( dio - > free_iov )
kfree ( dio - > iter . __iov ) ;
2022-11-03 07:29:43 +03:00
ret = dio - > op . error ? : ( ( long ) dio - > written < < 9 ) ;
2022-11-01 03:30:27 +03:00
bio_put ( & dio - > op . wbio . bio ) ;
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end ( & inode - > v ) ;
if ( ret < 0 )
ret = bch2_err_class ( ret ) ;
if ( ! sync ) {
req - > ki_complete ( req , ret ) ;
ret = - EIOCBQUEUED ;
}
return ret ;
}
static __always_inline void bch2_dio_write_end ( struct dio_write * dio )
{
struct bch_fs * c = dio - > op . c ;
struct kiocb * req = dio - > req ;
struct bch_inode_info * inode = dio - > inode ;
struct bio * bio = & dio - > op . wbio . bio ;
2022-11-14 06:43:37 +03:00
req - > ki_pos + = ( u64 ) dio - > op . written < < 9 ;
dio - > written + = dio - > op . written ;
2022-11-01 03:30:27 +03:00
2022-11-14 06:43:37 +03:00
if ( dio - > extending ) {
spin_lock ( & inode - > v . i_lock ) ;
if ( req - > ki_pos > inode - > v . i_size )
i_size_write ( & inode - > v , req - > ki_pos ) ;
spin_unlock ( & inode - > v . i_lock ) ;
}
if ( dio - > op . i_sectors_delta | | dio - > quota_res . sectors ) {
mutex_lock ( & inode - > ei_quota_lock ) ;
__i_sectors_acct ( c , inode , & dio - > quota_res , dio - > op . i_sectors_delta ) ;
__bch2_quota_reservation_put ( c , inode , & dio - > quota_res ) ;
mutex_unlock ( & inode - > ei_quota_lock ) ;
}
2022-11-01 03:30:27 +03:00
bio_release_pages ( bio , false ) ;
if ( unlikely ( dio - > op . error ) )
set_bit ( EI_INODE_ERROR , & inode - > ei_flags ) ;
}
2022-11-25 07:52:28 +03:00
static __always_inline long bch2_dio_write_loop ( struct dio_write * dio )
2017-03-17 09:18:50 +03:00
{
2022-11-01 03:30:27 +03:00
struct bch_fs * c = dio - > op . c ;
2017-03-17 09:18:50 +03:00
struct kiocb * req = dio - > req ;
2022-11-01 03:30:27 +03:00
struct address_space * mapping = dio - > mapping ;
struct bch_inode_info * inode = dio - > inode ;
2022-11-24 04:14:55 +03:00
struct bch_io_opts opts ;
2019-10-09 19:50:39 +03:00
struct bio * bio = & dio - > op . wbio . bio ;
2020-11-11 20:33:12 +03:00
unsigned unaligned , iter_count ;
bool sync = dio - > sync , dropped_locks ;
2017-03-17 09:18:50 +03:00
long ret ;
2022-11-24 04:14:55 +03:00
bch2_inode_opts_get ( & opts , c , & inode - > ei_inode ) ;
2017-03-17 09:18:50 +03:00
while ( 1 ) {
2020-11-11 20:33:12 +03:00
iter_count = dio - > iter . count ;
2022-11-01 03:30:27 +03:00
EBUG_ON ( current - > faults_disabled_mapping ) ;
2017-03-17 09:18:50 +03:00
current - > faults_disabled_mapping = mapping ;
ret = bio_iov_iter_get_pages ( bio , & dio - > iter ) ;
2020-11-11 20:33:12 +03:00
dropped_locks = fdm_dropped_locks ( ) ;
2017-03-17 09:18:50 +03:00
current - > faults_disabled_mapping = NULL ;
2020-11-11 20:33:12 +03:00
/*
* If the fault handler returned an error but also signalled
* that it dropped & retook ei_pagecache_lock , we just need to
* re - shoot down the page cache and retry :
*/
if ( dropped_locks & & ret )
ret = 0 ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret < 0 ) )
goto err ;
2020-11-11 20:33:12 +03:00
if ( unlikely ( dropped_locks ) ) {
ret = write_invalidate_inode_pages_range ( mapping ,
req - > ki_pos ,
req - > ki_pos + iter_count - 1 ) ;
if ( unlikely ( ret ) )
goto err ;
if ( ! bio - > bi_iter . bi_size )
continue ;
}
2019-09-22 22:02:05 +03:00
unaligned = bio - > bi_iter . bi_size & ( block_bytes ( c ) - 1 ) ;
bio - > bi_iter . bi_size - = unaligned ;
iov_iter_revert ( & dio - > iter , unaligned ) ;
if ( ! bio - > bi_iter . bi_size ) {
/*
* bio_iov_iter_get_pages was only able to get <
* blocksize worth of pages :
*/
ret = - EFAULT ;
goto err ;
}
2022-11-24 04:14:55 +03:00
bch2_write_op_init ( & dio - > op , c , opts ) ;
2022-11-01 03:30:27 +03:00
dio - > op . end_io = sync
? NULL
: bch2_dio_write_loop_async ;
2020-06-30 01:22:06 +03:00
dio - > op . target = dio - > op . opts . foreground_target ;
dio - > op . write_point = writepoint_hashed ( ( unsigned long ) current ) ;
dio - > op . nr_replicas = dio - > op . opts . data_replicas ;
2021-03-13 04:30:39 +03:00
dio - > op . subvol = inode - > ei_subvol ;
2020-06-30 01:22:06 +03:00
dio - > op . pos = POS ( inode - > v . i_ino , ( u64 ) req - > ki_pos > > 9 ) ;
2022-10-29 22:54:17 +03:00
if ( sync )
dio - > op . flags | = BCH_WRITE_SYNC ;
2021-05-20 22:49:23 +03:00
dio - > op . flags | = BCH_WRITE_CHECK_ENOSPC ;
2020-06-30 01:22:06 +03:00
2022-11-14 06:43:37 +03:00
ret = bch2_quota_reservation_add ( c , inode , & dio - > quota_res ,
bio_sectors ( bio ) , true ) ;
if ( unlikely ( ret ) )
goto err ;
2020-06-30 01:22:06 +03:00
ret = bch2_disk_reservation_get ( c , & dio - > op . res , bio_sectors ( bio ) ,
dio - > op . opts . data_replicas , 0 ) ;
if ( unlikely ( ret ) & &
2022-11-01 03:30:27 +03:00
! bch2_dio_write_check_allocated ( dio ) )
2020-06-30 01:22:06 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
task_io_account_write ( bio - > bi_iter . bi_size ) ;
2022-11-01 03:30:27 +03:00
if ( unlikely ( dio - > iter . count ) & &
! dio - > sync & &
! dio - > loop & &
bch2_dio_write_copy_iov ( dio ) )
dio - > sync = sync = true ;
2017-03-17 09:18:50 +03:00
dio - > loop = true ;
2019-11-02 04:16:51 +03:00
closure_call ( & dio - > op . cl , bch2_write , NULL , NULL ) ;
2017-03-17 09:18:50 +03:00
2022-11-01 03:30:27 +03:00
if ( ! sync )
2017-03-17 09:18:50 +03:00
return - EIOCBQUEUED ;
2019-10-09 19:50:39 +03:00
2022-11-01 03:30:27 +03:00
bch2_dio_write_end ( dio ) ;
2019-10-09 19:50:39 +03:00
2022-11-01 03:30:27 +03:00
if ( likely ( ! dio - > iter . count ) | | dio - > op . error )
2017-03-17 09:18:50 +03:00
break ;
2019-11-02 04:16:51 +03:00
2017-03-17 09:18:50 +03:00
bio_reset ( bio , NULL , REQ_OP_WRITE ) ;
}
2022-11-01 03:30:27 +03:00
out :
return bch2_dio_write_done ( dio ) ;
2017-03-17 09:18:50 +03:00
err :
2022-11-01 03:30:27 +03:00
dio - > op . error = ret ;
2017-03-17 09:18:50 +03:00
2021-07-14 07:14:45 +03:00
bio_release_pages ( bio , false ) ;
2022-11-14 06:43:37 +03:00
bch2_quota_reservation_put ( c , inode , & dio - > quota_res ) ;
2022-11-01 03:30:27 +03:00
goto out ;
2017-03-17 09:18:50 +03:00
}
2022-11-25 07:52:28 +03:00
static noinline __cold void bch2_dio_write_continue ( struct dio_write * dio )
2017-03-17 09:18:50 +03:00
{
2022-11-01 03:30:27 +03:00
struct mm_struct * mm = dio - > mm ;
bio_reset ( & dio - > op . wbio . bio , NULL , REQ_OP_WRITE ) ;
2017-03-17 09:18:50 +03:00
2022-11-01 03:30:27 +03:00
if ( mm )
kthread_use_mm ( mm ) ;
bch2_dio_write_loop ( dio ) ;
if ( mm )
kthread_unuse_mm ( mm ) ;
2017-03-17 09:18:50 +03:00
}
2022-11-25 07:52:28 +03:00
static void bch2_dio_write_loop_async ( struct bch_write_op * op )
{
struct dio_write * dio = container_of ( op , struct dio_write , op ) ;
bch2_dio_write_end ( dio ) ;
if ( likely ( ! dio - > iter . count ) | | dio - > op . error )
bch2_dio_write_done ( dio ) ;
else
bch2_dio_write_continue ( dio ) ;
}
2017-03-17 09:18:50 +03:00
static noinline
ssize_t bch2_direct_write ( struct kiocb * req , struct iov_iter * iter )
{
struct file * file = req - > ki_filp ;
2019-11-04 22:11:53 +03:00
struct address_space * mapping = file - > f_mapping ;
2017-03-17 09:18:50 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct dio_write * dio ;
struct bio * bio ;
2019-11-02 04:35:25 +03:00
bool locked = true , extending ;
2017-03-17 09:18:50 +03:00
ssize_t ret ;
2019-11-02 04:35:25 +03:00
prefetch ( & c - > opts ) ;
prefetch ( ( void * ) & c - > opts + 64 ) ;
prefetch ( & inode - > ei_inode ) ;
prefetch ( ( void * ) & inode - > ei_inode + 64 ) ;
2017-03-17 09:18:50 +03:00
2019-11-02 04:35:25 +03:00
inode_lock ( & inode - > v ) ;
ret = generic_write_checks ( req , iter ) ;
if ( unlikely ( ret < = 0 ) )
goto err ;
ret = file_remove_privs ( file ) ;
if ( unlikely ( ret ) )
goto err ;
ret = file_update_time ( file ) ;
if ( unlikely ( ret ) )
goto err ;
2017-03-17 09:18:50 +03:00
2019-01-20 02:12:24 +03:00
if ( unlikely ( ( req - > ki_pos | iter - > count ) & ( block_bytes ( c ) - 1 ) ) )
2019-11-02 04:35:25 +03:00
goto err ;
inode_dio_begin ( & inode - > v ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_get ( inode ) ;
2019-11-02 04:35:25 +03:00
extending = req - > ki_pos + iter - > count > inode - > v . i_size ;
if ( ! extending ) {
inode_unlock ( & inode - > v ) ;
locked = false ;
}
2017-03-17 09:18:50 +03:00
bio = bio_alloc_bioset ( NULL ,
2022-03-08 21:52:58 +03:00
bio_iov_vecs_to_alloc ( iter , BIO_MAX_VECS ) ,
2017-03-17 09:18:50 +03:00
REQ_OP_WRITE ,
GFP_KERNEL ,
& c - > dio_write_bioset ) ;
2019-10-09 19:50:39 +03:00
dio = container_of ( bio , struct dio_write , op . wbio . bio ) ;
2017-03-17 09:18:50 +03:00
dio - > req = req ;
2022-11-01 03:30:27 +03:00
dio - > mapping = mapping ;
dio - > inode = inode ;
2019-01-14 05:36:14 +03:00
dio - > mm = current - > mm ;
2017-03-17 09:18:50 +03:00
dio - > loop = false ;
2022-11-14 06:43:37 +03:00
dio - > extending = extending ;
2019-11-02 04:35:25 +03:00
dio - > sync = is_sync_kiocb ( req ) | | extending ;
2022-11-03 07:29:43 +03:00
dio - > flush = iocb_is_dsync ( req ) & & ! c - > opts . journal_flush_disabled ;
2017-03-17 09:18:50 +03:00
dio - > free_iov = false ;
dio - > quota_res . sectors = 0 ;
2020-06-30 01:22:06 +03:00
dio - > written = 0 ;
2017-03-17 09:18:50 +03:00
dio - > iter = * iter ;
2022-11-01 03:30:27 +03:00
dio - > op . c = c ;
2019-10-09 19:50:39 +03:00
2022-11-02 23:45:28 +03:00
if ( unlikely ( mapping - > nrpages ) ) {
ret = write_invalidate_inode_pages_range ( mapping ,
req - > ki_pos ,
req - > ki_pos + iter - > count - 1 ) ;
if ( unlikely ( ret ) )
goto err_put_bio ;
}
2019-11-04 22:11:53 +03:00
2019-11-02 04:35:25 +03:00
ret = bch2_dio_write_loop ( dio ) ;
2017-03-17 09:18:50 +03:00
err :
2019-11-02 04:35:25 +03:00
if ( locked )
inode_unlock ( & inode - > v ) ;
return ret ;
err_put_bio :
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_put ( inode ) ;
2017-03-17 09:18:50 +03:00
bio_put ( bio ) ;
2019-11-02 04:35:25 +03:00
inode_dio_end ( & inode - > v ) ;
goto err ;
2017-03-17 09:18:50 +03:00
}
2019-11-02 04:35:25 +03:00
ssize_t bch2_write_iter ( struct kiocb * iocb , struct iov_iter * from )
2017-03-17 09:18:50 +03:00
{
struct file * file = iocb - > ki_filp ;
2019-11-02 04:35:25 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
2017-03-17 09:18:50 +03:00
ssize_t ret ;
2022-09-18 22:43:50 +03:00
if ( iocb - > ki_flags & IOCB_DIRECT ) {
ret = bch2_direct_write ( iocb , from ) ;
goto out ;
}
2017-03-17 09:18:50 +03:00
2019-11-02 04:35:25 +03:00
inode_lock ( & inode - > v ) ;
ret = generic_write_checks ( iocb , from ) ;
if ( ret < = 0 )
goto unlock ;
2017-03-17 09:18:50 +03:00
ret = file_remove_privs ( file ) ;
if ( ret )
2019-11-02 04:35:25 +03:00
goto unlock ;
2017-03-17 09:18:50 +03:00
ret = file_update_time ( file ) ;
if ( ret )
2019-11-02 04:35:25 +03:00
goto unlock ;
2017-03-17 09:18:50 +03:00
2019-11-02 04:35:25 +03:00
ret = bch2_buffered_write ( iocb , from ) ;
2017-03-17 09:18:50 +03:00
if ( likely ( ret > 0 ) )
iocb - > ki_pos + = ret ;
2019-11-02 04:35:25 +03:00
unlock :
2017-03-17 09:18:50 +03:00
inode_unlock ( & inode - > v ) ;
2019-11-02 04:35:25 +03:00
if ( ret > 0 )
2017-03-17 09:18:50 +03:00
ret = generic_write_sync ( iocb , ret ) ;
2022-09-18 22:43:50 +03:00
out :
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* fsync: */
2021-11-05 22:17:13 +03:00
/*
* inode - > ei_inode . bi_journal_seq won ' t be up to date since it ' s set in an
* insert trigger : look up the btree inode instead
*/
static int bch2_flush_inode ( struct bch_fs * c , subvol_inum inum )
2017-03-17 09:18:50 +03:00
{
2021-11-05 22:17:13 +03:00
struct bch_inode_unpacked inode ;
int ret ;
2017-03-17 09:18:50 +03:00
2021-11-05 22:17:13 +03:00
if ( c - > opts . journal_flush_disabled )
return 0 ;
ret = bch2_inode_find_by_inum ( c , inum , & inode ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
2021-11-05 22:17:13 +03:00
return bch2_journal_flush_seq ( & c - > journal , inode . bi_journal_seq ) ;
}
2017-03-17 09:18:50 +03:00
2021-11-05 22:17:13 +03:00
int bch2_fsync ( struct file * file , loff_t start , loff_t end , int datasync )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
int ret , ret2 , ret3 ;
ret = file_write_and_wait_range ( file , start , end ) ;
ret2 = sync_inode_metadata ( & inode - > v , 1 ) ;
ret3 = bch2_flush_inode ( c , inode_inum ( inode ) ) ;
2018-07-23 14:53:29 +03:00
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ? : ret2 ? : ret3 ) ;
2017-03-17 09:18:50 +03:00
}
/* truncate: */
2021-03-16 07:28:17 +03:00
static inline int range_has_data ( struct bch_fs * c , u32 subvol ,
struct bpos start ,
struct bpos end )
2017-03-17 09:18:50 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
int ret = 0 ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , subvol , & start . snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2022-10-11 11:32:41 +03:00
for_each_btree_key_upto_norestart ( & trans , iter , BTREE_ID_extents , start , end , 0 , k , ret )
2017-03-17 09:18:50 +03:00
if ( bkey_extent_is_data ( k . k ) ) {
ret = 1 ;
break ;
}
2021-03-16 07:28:17 +03:00
start = iter . pos ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-03-16 07:28:17 +03:00
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
return ret ;
2017-03-17 09:18:50 +03:00
}
static int __bch2_truncate_page ( struct bch_inode_info * inode ,
pgoff_t index , loff_t start , loff_t end )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2019-08-06 18:19:58 +03:00
struct bch_page_state * s ;
2017-03-17 09:18:50 +03:00
unsigned start_offset = start & ( PAGE_SIZE - 1 ) ;
unsigned end_offset = ( ( end - 1 ) & ( PAGE_SIZE - 1 ) ) + 1 ;
2019-08-06 18:19:58 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
struct page * page ;
2021-11-22 20:47:20 +03:00
s64 i_sectors_delta = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
/* Page boundary? Nothing to do */
if ( ! ( ( index = = start > > PAGE_SHIFT & & start_offset ) | |
( index = = end > > PAGE_SHIFT & & end_offset ! = PAGE_SIZE ) ) )
return 0 ;
/* Above i_size? */
if ( index < < PAGE_SHIFT > = inode - > v . i_size )
return 0 ;
page = find_lock_page ( mapping , index ) ;
if ( ! page ) {
/*
* XXX : we ' re doing two index lookups when we end up reading the
* page
*/
2021-03-16 07:28:17 +03:00
ret = range_has_data ( c , inode - > ei_subvol ,
2022-10-11 11:32:41 +03:00
POS ( inode - > v . i_ino , ( index < < PAGE_SECTORS_SHIFT ) ) ,
POS ( inode - > v . i_ino , ( index < < PAGE_SECTORS_SHIFT ) + PAGE_SECTORS ) ) ;
2017-03-17 09:18:50 +03:00
if ( ret < = 0 )
return ret ;
page = find_or_create_page ( mapping , index , GFP_KERNEL ) ;
if ( unlikely ( ! page ) ) {
ret = - ENOMEM ;
goto out ;
}
}
2019-08-06 18:19:58 +03:00
s = bch2_page_state_create ( page , 0 ) ;
if ( ! s ) {
ret = - ENOMEM ;
goto unlock ;
}
2017-03-17 09:18:50 +03:00
if ( ! PageUptodate ( page ) ) {
ret = bch2_read_single_page ( page , mapping ) ;
if ( ret )
goto unlock ;
}
2019-08-06 18:19:58 +03:00
if ( index ! = start > > PAGE_SHIFT )
start_offset = 0 ;
if ( index ! = end > > PAGE_SHIFT )
end_offset = PAGE_SIZE ;
for ( i = round_up ( start_offset , block_bytes ( c ) ) > > 9 ;
i < round_down ( end_offset , block_bytes ( c ) ) > > 9 ;
i + + ) {
s - > s [ i ] . nr_replicas = 0 ;
2021-11-22 20:47:20 +03:00
if ( s - > s [ i ] . state = = SECTOR_DIRTY )
i_sectors_delta - - ;
2019-08-06 18:19:58 +03:00
s - > s [ i ] . state = SECTOR_UNALLOCATED ;
}
2021-11-22 20:47:20 +03:00
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2021-11-06 20:39:42 +03:00
/*
* Caller needs to know whether this page will be written out by
* writeback - doing an i_size update if necessary - or whether it will
* be responsible for the i_size update :
*/
ret = s - > s [ ( min_t ( u64 , inode - > v . i_size - ( index < < PAGE_SHIFT ) ,
PAGE_SIZE ) - 1 ) > > 9 ] . state > = SECTOR_DIRTY ;
2019-08-06 18:19:58 +03:00
zero_user_segment ( page , start_offset , end_offset ) ;
2017-03-17 09:18:50 +03:00
/*
* Bit of a hack - we don ' t want truncate to fail due to - ENOSPC .
*
* XXX : because we aren ' t currently tracking whether the page has actual
* data in it ( vs . just 0 s , or only partially written ) this wrong . ick .
*/
2021-11-06 20:39:42 +03:00
BUG_ON ( bch2_get_page_disk_reservation ( c , inode , page , false ) ) ;
2017-03-17 09:18:50 +03:00
2020-10-09 07:09:20 +03:00
/*
* This removes any writeable userspace mappings ; we need to force
* . page_mkwrite to be called again before any mmapped writes , to
* redirty the full page :
*/
page_mkclean ( page ) ;
2019-07-29 20:38:38 +03:00
filemap_dirty_folio ( mapping , page_folio ( page ) ) ;
2017-03-17 09:18:50 +03:00
unlock :
unlock_page ( page ) ;
put_page ( page ) ;
out :
return ret ;
}
static int bch2_truncate_page ( struct bch_inode_info * inode , loff_t from )
{
return __bch2_truncate_page ( inode , from > > PAGE_SHIFT ,
2019-08-06 18:19:58 +03:00
from , round_up ( from , PAGE_SIZE ) ) ;
2017-03-17 09:18:50 +03:00
}
2021-11-06 20:39:42 +03:00
static int bch2_truncate_pages ( struct bch_inode_info * inode ,
loff_t start , loff_t end )
{
int ret = __bch2_truncate_page ( inode , start > > PAGE_SHIFT ,
start , end ) ;
if ( ret > = 0 & &
start > > PAGE_SHIFT ! = end > > PAGE_SHIFT )
ret = __bch2_truncate_page ( inode ,
end > > PAGE_SHIFT ,
start , end ) ;
return ret ;
}
2021-06-15 05:29:54 +03:00
static int bch2_extend ( struct mnt_idmap * idmap ,
struct bch_inode_info * inode ,
2019-10-09 18:12:48 +03:00
struct bch_inode_unpacked * inode_u ,
struct iattr * iattr )
2017-03-17 09:18:50 +03:00
{
struct address_space * mapping = inode - > v . i_mapping ;
int ret ;
2019-10-09 18:12:48 +03:00
/*
* sync appends :
2019-10-09 19:11:00 +03:00
*
* this has to be done _before_ extending i_size :
2019-10-09 18:12:48 +03:00
*/
ret = filemap_write_and_wait_range ( mapping , inode_u - > bi_size , S64_MAX ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
truncate_setsize ( & inode - > v , iattr - > ia_size ) ;
2021-06-15 05:29:54 +03:00
return bch2_setattr_nonsize ( idmap , inode , iattr ) ;
2017-03-17 09:18:50 +03:00
}
2018-08-09 04:09:31 +03:00
static int bch2_truncate_finish_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi ,
void * p )
{
bi - > bi_flags & = ~ BCH_INODE_I_SIZE_DIRTY ;
return 0 ;
}
static int bch2_truncate_start_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi , void * p )
{
u64 * new_i_size = p ;
bi - > bi_flags | = BCH_INODE_I_SIZE_DIRTY ;
bi - > bi_size = * new_i_size ;
return 0 ;
}
2021-06-15 05:29:54 +03:00
int bch2_truncate ( struct mnt_idmap * idmap ,
struct bch_inode_info * inode , struct iattr * iattr )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2019-10-09 18:12:48 +03:00
struct bch_inode_unpacked inode_u ;
2018-08-09 04:09:31 +03:00
u64 new_i_size = iattr - > ia_size ;
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2021-06-15 05:29:54 +03:00
/*
2021-06-28 03:54:34 +03:00
* If the truncate call with change the size of the file , the
* cmtimes should be updated . If the size will not change , we
* do not need to update the cmtimes .
2021-06-15 05:29:54 +03:00
*/
2021-06-28 03:54:34 +03:00
if ( iattr - > ia_size ! = inode - > v . i_size ) {
if ( ! ( iattr - > ia_valid & ATTR_MTIME ) )
ktime_get_coarse_real_ts64 ( & iattr - > ia_mtime ) ;
if ( ! ( iattr - > ia_valid & ATTR_CTIME ) )
ktime_get_coarse_real_ts64 ( & iattr - > ia_ctime ) ;
iattr - > ia_valid | = ATTR_MTIME | ATTR_CTIME ;
}
2021-06-15 05:29:54 +03:00
2017-03-17 09:18:50 +03:00
inode_dio_wait ( & inode - > v ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_get ( inode ) ;
2017-03-17 09:18:50 +03:00
2021-03-16 07:28:17 +03:00
ret = bch2_inode_find_by_inum ( c , inode_inum ( inode ) , & inode_u ) ;
2019-12-18 21:18:33 +03:00
if ( ret )
goto err ;
/*
* check this before next assertion ; on filesystem error our normal
* invariants are a bit broken ( truncate has to truncate the page cache
* before the inode ) .
*/
ret = bch2_journal_error ( & c - > journal ) ;
2019-10-09 18:12:48 +03:00
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2022-11-15 23:57:07 +03:00
WARN_ONCE ( ! test_bit ( EI_INODE_ERROR , & inode - > ei_flags ) & &
inode - > v . i_size < inode_u . bi_size ,
" truncate spotted in mem i_size < btree i_size: %llu < %llu \n " ,
( u64 ) inode - > v . i_size , inode_u . bi_size ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 18:12:48 +03:00
if ( iattr - > ia_size > inode - > v . i_size ) {
2021-06-15 05:29:54 +03:00
ret = bch2_extend ( idmap , inode , & inode_u , iattr ) ;
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
}
2021-06-15 05:29:54 +03:00
iattr - > ia_valid & = ~ ATTR_SIZE ;
2017-03-17 09:18:50 +03:00
ret = bch2_truncate_page ( inode , iattr - > ia_size ) ;
2021-11-06 20:39:42 +03:00
if ( unlikely ( ret < 0 ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2019-09-20 01:05:04 +03:00
/*
* When extending , we ' re going to write the new i_size to disk
* immediately so we need to flush anything above the current on disk
* i_size first :
*
* Also , when extending we need to flush the page that i_size currently
* straddles - if it ' s mapped to userspace , we need to ensure that
* userspace has to redirty it and call . mkwrite - > set_page_dirty
* again to allocate the part of the page that was extended .
*/
2019-10-09 18:12:48 +03:00
if ( iattr - > ia_size > inode_u . bi_size )
2017-03-17 09:18:50 +03:00
ret = filemap_write_and_wait_range ( mapping ,
2019-10-09 18:12:48 +03:00
inode_u . bi_size ,
2017-03-17 09:18:50 +03:00
iattr - > ia_size - 1 ) ;
else if ( iattr - > ia_size & ( PAGE_SIZE - 1 ) )
ret = filemap_write_and_wait_range ( mapping ,
round_down ( iattr - > ia_size , PAGE_SIZE ) ,
iattr - > ia_size - 1 ) ;
if ( ret )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2018-08-09 04:09:31 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
ret = bch2_write_inode ( c , inode , bch2_truncate_start_fn ,
& new_i_size , 0 ) ;
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
truncate_setsize ( & inode - > v , iattr - > ia_size ) ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2019-08-06 18:19:58 +03:00
round_up ( iattr - > ia_size , block_bytes ( c ) ) > > 9 ,
2021-11-05 22:17:13 +03:00
U64_MAX , & i_sectors_delta ) ;
2019-10-10 19:47:22 +03:00
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2022-04-16 23:06:59 +03:00
bch2_fs_inconsistent_on ( ! inode - > v . i_size & & inode - > v . i_blocks & &
! bch2_journal_error ( & c - > journal ) , c ,
" inode %lu truncated to 0 but i_blocks %llu (ondisk %lli) " ,
inode - > v . i_ino , ( u64 ) inode - > v . i_blocks ,
inode - > ei_inode . bi_sectors ) ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2018-08-09 04:09:31 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-06-15 05:29:54 +03:00
ret = bch2_write_inode ( c , inode , bch2_truncate_finish_fn , NULL , 0 ) ;
2018-08-09 04:09:31 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2021-06-15 05:29:54 +03:00
ret = bch2_setattr_nonsize ( idmap , inode , iattr ) ;
2018-08-09 04:09:31 +03:00
err :
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_put ( inode ) ;
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
/* fallocate: */
2021-04-29 02:36:12 +03:00
static int inode_update_times_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi , void * p )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
bi - > bi_mtime = bi - > bi_ctime = bch2_current_time ( c ) ;
return 0 ;
}
2019-10-10 19:47:22 +03:00
static long bchfs_fpunch ( struct bch_inode_info * inode , loff_t offset , loff_t len )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2021-11-06 20:39:42 +03:00
u64 end = offset + len ;
u64 block_start = round_up ( offset , block_bytes ( c ) ) ;
u64 block_end = round_down ( end , block_bytes ( c ) ) ;
bool truncated_last_page ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2021-11-06 20:39:42 +03:00
ret = bch2_truncate_pages ( inode , offset , end ) ;
if ( unlikely ( ret < 0 ) )
2017-03-17 09:18:50 +03:00
goto err ;
2021-11-06 20:39:42 +03:00
truncated_last_page = ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
truncate_pagecache_range ( & inode - > v , offset , end - 1 ) ;
2017-03-17 09:18:50 +03:00
2022-10-20 01:31:33 +03:00
if ( block_start < block_end ) {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2021-11-06 20:39:42 +03:00
block_start > > 9 , block_end > > 9 ,
2019-10-10 19:47:22 +03:00
& i_sectors_delta ) ;
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
}
2021-04-29 02:36:12 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-11-06 20:39:42 +03:00
if ( end > = inode - > v . i_size & & ! truncated_last_page ) {
ret = bch2_write_inode_size ( c , inode , inode - > v . i_size ,
ATTR_MTIME | ATTR_CTIME ) ;
} else {
ret = bch2_write_inode ( c , inode , inode_update_times_fn , NULL ,
ATTR_MTIME | ATTR_CTIME ) ;
}
2021-04-29 02:36:12 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
err :
return ret ;
}
2019-10-10 19:47:22 +03:00
static long bchfs_fcollapse_finsert ( struct bch_inode_info * inode ,
2019-09-08 01:04:23 +03:00
loff_t offset , loff_t len ,
bool insert )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2020-12-17 23:08:58 +03:00
struct bkey_buf copy ;
2018-07-13 02:19:41 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter src , dst , del ;
2019-09-08 01:04:23 +03:00
loff_t shift , new_size ;
u64 src_start ;
2021-03-20 03:29:11 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
if ( ( offset | len ) & ( block_bytes ( c ) - 1 ) )
return - EINVAL ;
2019-09-08 01:04:23 +03:00
if ( insert ) {
if ( inode - > v . i_sb - > s_maxbytes - inode - > v . i_size < len )
2021-11-06 20:39:42 +03:00
return - EFBIG ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
if ( offset > = inode - > v . i_size )
2021-11-06 20:39:42 +03:00
return - EINVAL ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
src_start = U64_MAX ;
shift = len ;
} else {
if ( offset + len > = inode - > v . i_size )
2021-11-06 20:39:42 +03:00
return - EINVAL ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
src_start = offset + len ;
shift = - len ;
}
new_size = inode - > v . i_size + shift ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
ret = write_invalidate_inode_pages_range ( mapping , offset , LLONG_MAX ) ;
2019-07-22 20:37:02 +03:00
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2019-07-22 20:37:02 +03:00
2019-09-08 01:04:23 +03:00
if ( insert ) {
i_size_write ( & inode - > v , new_size ) ;
mutex_lock ( & inode - > ei_update_lock ) ;
ret = bch2_write_inode_size ( c , inode , new_size ,
ATTR_MTIME | ATTR_CTIME ) ;
mutex_unlock ( & inode - > ei_update_lock ) ;
} else {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2019-10-10 19:47:22 +03:00
offset > > 9 , ( offset + len ) > > 9 ,
& i_sectors_delta ) ;
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2019-09-08 01:04:23 +03:00
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2019-09-08 01:04:23 +03:00
}
2018-08-12 00:26:11 +03:00
2021-03-20 03:29:11 +03:00
bch2_bkey_buf_init ( & copy ) ;
2021-06-03 06:31:42 +03:00
bch2_trans_init ( & trans , c , BTREE_ITER_MAX , 1024 ) ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( & trans , & src , BTREE_ID_extents ,
2019-09-08 01:04:23 +03:00
POS ( inode - > v . i_ino , src_start > > 9 ) ,
2019-07-22 20:37:02 +03:00
BTREE_ITER_INTENT ) ;
2021-08-30 22:18:31 +03:00
bch2_trans_copy_iter ( & dst , & src ) ;
bch2_trans_copy_iter ( & del , & src ) ;
2019-09-08 01:04:23 +03:00
2022-07-18 06:06:38 +03:00
while ( ret = = 0 | |
bch2_err_matches ( ret , BCH_ERR_transaction_restart ) ) {
2019-07-22 20:37:02 +03:00
struct disk_reservation disk_res =
bch2_disk_reservation_init ( c , 0 ) ;
struct bkey_i delete ;
struct bkey_s_c k ;
struct bpos next_pos ;
2019-09-08 01:04:23 +03:00
struct bpos move_pos = POS ( inode - > v . i_ino , offset > > 9 ) ;
struct bpos atomic_end ;
2020-01-01 00:17:42 +03:00
unsigned trigger_flags = 0 ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans ,
inode - > ei_subvol , & snapshot ) ;
if ( ret )
continue ;
bch2_btree_iter_set_snapshot ( & src , snapshot ) ;
bch2_btree_iter_set_snapshot ( & dst , snapshot ) ;
bch2_btree_iter_set_snapshot ( & del , snapshot ) ;
2017-03-17 09:18:50 +03:00
2021-07-25 03:24:10 +03:00
bch2_trans_begin ( & trans ) ;
2019-09-08 01:04:23 +03:00
k = insert
2021-08-30 22:18:31 +03:00
? bch2_btree_iter_peek_prev ( & src )
2022-10-11 11:32:41 +03:00
: bch2_btree_iter_peek_upto ( & src , POS ( inode - > v . i_ino , U64_MAX ) ) ;
2019-07-22 20:37:02 +03:00
if ( ( ret = bkey_err ( k ) ) )
2021-03-20 03:29:11 +03:00
continue ;
2018-08-12 00:26:11 +03:00
2019-07-22 20:37:02 +03:00
if ( ! k . k | | k . k - > p . inode ! = inode - > v . i_ino )
break ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
if ( insert & &
2022-11-24 11:12:22 +03:00
bkey_le ( k . k - > p , POS ( inode - > v . i_ino , offset > > 9 ) ) )
2019-09-08 01:04:23 +03:00
break ;
reassemble :
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & copy , c , k ) ;
2019-09-08 01:04:23 +03:00
if ( insert & &
2022-11-24 11:12:22 +03:00
bkey_lt ( bkey_start_pos ( k . k ) , move_pos ) )
2019-11-10 00:01:15 +03:00
bch2_cut_front ( move_pos , copy . k ) ;
2017-03-17 09:18:50 +03:00
2019-11-10 00:01:15 +03:00
copy . k - > k . p . offset + = shift > > 9 ;
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & dst , bkey_start_pos ( & copy . k - > k ) ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
ret = bch2_extent_atomic_end ( & trans , & dst , copy . k , & atomic_end ) ;
2019-08-16 16:58:07 +03:00
if ( ret )
2021-03-20 03:29:11 +03:00
continue ;
2018-08-06 00:46:41 +03:00
2022-11-24 11:12:22 +03:00
if ( ! bkey_eq ( atomic_end , copy . k - > k . p ) ) {
2019-09-08 01:04:23 +03:00
if ( insert ) {
move_pos = atomic_end ;
move_pos . offset - = shift > > 9 ;
goto reassemble ;
} else {
2019-11-10 03:02:48 +03:00
bch2_cut_back ( atomic_end , copy . k ) ;
2019-09-08 01:04:23 +03:00
}
}
2019-07-22 20:37:02 +03:00
bkey_init ( & delete . k ) ;
2020-04-01 23:07:57 +03:00
delete . k . p = copy . k - > k . p ;
delete . k . size = copy . k - > k . size ;
delete . k . p . offset - = shift > > 9 ;
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & del , bkey_start_pos ( & delete . k ) ) ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
next_pos = insert ? bkey_start_pos ( & delete . k ) : delete . k . p ;
2017-03-17 09:18:50 +03:00
2022-04-09 06:54:14 +03:00
if ( copy . k - > k . size ! = k . k - > size ) {
2019-07-22 20:37:02 +03:00
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
2019-11-17 00:25:58 +03:00
bch2_bkey_nr_ptrs_allocated ( bkey_i_to_s_c ( copy . k ) ) ;
2019-07-22 20:37:02 +03:00
ret = bch2_disk_reservation_get ( c , & disk_res ,
2019-11-10 00:01:15 +03:00
copy . k - > k . size , nr_ptrs ,
2019-07-22 20:37:02 +03:00
BCH_DISK_RESERVATION_NOFAIL ) ;
BUG_ON ( ret ) ;
}
2021-08-30 22:18:31 +03:00
ret = bch2_btree_iter_traverse ( & del ) ? :
bch2_trans_update ( & trans , & del , & delete , trigger_flags ) ? :
bch2_trans_update ( & trans , & dst , copy . k , trigger_flags ) ? :
2021-11-05 22:17:13 +03:00
bch2_trans_commit ( & trans , & disk_res , NULL ,
2020-01-01 03:37:10 +03:00
BTREE_INSERT_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
bch2_disk_reservation_put ( c , & disk_res ) ;
2021-03-20 03:29:11 +03:00
2019-07-22 20:37:02 +03:00
if ( ! ret )
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & src , next_pos ) ;
2017-03-17 09:18:50 +03:00
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & del ) ;
bch2_trans_iter_exit ( & trans , & dst ) ;
bch2_trans_iter_exit ( & trans , & src ) ;
2021-03-20 03:29:11 +03:00
bch2_trans_exit ( & trans ) ;
bch2_bkey_buf_exit ( & copy , c ) ;
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2019-09-08 01:04:23 +03:00
if ( ! insert ) {
i_size_write ( & inode - > v , new_size ) ;
ret = bch2_write_inode_size ( c , inode , new_size ,
ATTR_MTIME | ATTR_CTIME ) ;
2021-11-06 20:39:42 +03:00
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = bch2_write_inode ( c , inode , inode_update_times_fn , NULL ,
ATTR_MTIME | ATTR_CTIME ) ;
2019-09-08 01:04:23 +03:00
}
2021-11-06 20:39:42 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-04-17 03:35:20 +03:00
static int __bchfs_fallocate ( struct bch_inode_info * inode , int mode ,
u64 start_sector , u64 end_sector )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2018-08-06 00:48:00 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-04-17 03:35:20 +03:00
struct bpos end_pos = POS ( inode - > v . i_ino , end_sector ) ;
2022-11-24 04:14:55 +03:00
struct bch_io_opts opts ;
2021-04-17 03:35:20 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2022-11-24 04:14:55 +03:00
bch2_inode_opts_get ( & opts , c , & inode - > ei_inode ) ;
2021-06-03 06:31:42 +03:00
bch2_trans_init ( & trans , c , BTREE_ITER_MAX , 512 ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( & trans , & iter , BTREE_ID_extents ,
2021-04-17 03:35:20 +03:00
POS ( inode - > v . i_ino , start_sector ) ,
2018-08-06 00:48:00 +03:00
BTREE_ITER_SLOTS | BTREE_ITER_INTENT ) ;
2017-03-17 09:18:50 +03:00
2022-11-24 11:12:22 +03:00
while ( ! ret & & bkey_lt ( iter . pos , end_pos ) ) {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2018-08-06 00:48:00 +03:00
struct quota_res quota_res = { 0 } ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2021-04-17 03:35:20 +03:00
unsigned sectors ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
2020-02-26 23:39:46 +03:00
bch2_trans_begin ( & trans ) ;
2019-12-21 00:35:24 +03:00
2021-03-16 07:28:17 +03:00
ret = bch2_subvolume_get_snapshot ( & trans ,
inode - > ei_subvol , & snapshot ) ;
if ( ret )
goto bkey_err ;
bch2_btree_iter_set_snapshot ( & iter , snapshot ) ;
2021-08-30 22:18:31 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
2019-03-28 05:03:30 +03:00
if ( ( ret = bkey_err ( k ) ) )
goto bkey_err ;
2017-03-17 09:18:50 +03:00
/* already reserved */
2022-11-14 02:59:01 +03:00
if ( bkey_extent_is_reservation ( k ) & &
bch2_bkey_nr_ptrs_fully_allocated ( k ) > = opts . data_replicas ) {
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2017-03-17 09:18:50 +03:00
continue ;
}
2018-08-06 00:48:00 +03:00
if ( bkey_extent_is_data ( k . k ) & &
! ( mode & FALLOC_FL_ZERO_RANGE ) ) {
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2018-08-06 00:48:00 +03:00
continue ;
2017-03-17 09:18:50 +03:00
}
2022-11-14 02:54:37 +03:00
sectors = bpos_min ( k . k - > p , end_pos ) . offset - iter . pos . offset ;
2017-03-17 09:18:50 +03:00
if ( ! bkey_extent_is_allocation ( k . k ) ) {
ret = bch2_quota_reservation_add ( c , inode ,
2018-08-06 00:48:00 +03:00
& quota_res ,
2017-03-17 09:18:50 +03:00
sectors , true ) ;
if ( unlikely ( ret ) )
2019-03-28 05:03:30 +03:00
goto bkey_err ;
2017-03-17 09:18:50 +03:00
}
2022-11-14 02:54:37 +03:00
ret = bch2_extent_fallocate ( & trans , inode_inum ( inode ) , & iter ,
sectors , opts , & i_sectors_delta ,
writepoint_hashed ( ( unsigned long ) current ) ) ;
2021-11-22 06:34:26 +03:00
if ( ret )
goto bkey_err ;
2022-11-14 02:54:37 +03:00
2019-10-10 19:47:22 +03:00
i_sectors_acct ( c , inode , & quota_res , i_sectors_delta ) ;
2019-03-28 05:03:30 +03:00
bkey_err :
2018-08-06 00:48:00 +03:00
bch2_quota_reservation_put ( c , inode , & quota_res ) ;
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2017-03-17 09:18:50 +03:00
ret = 0 ;
}
2021-11-06 20:39:42 +03:00
2021-11-24 02:21:09 +03:00
bch2_trans_unlock ( & trans ) ; /* lock ordering, before taking pagecache locks: */
mark_pagecache_reserved ( inode , start_sector , iter . pos . offset ) ;
2022-09-19 00:10:33 +03:00
if ( bch2_err_matches ( ret , ENOSPC ) & & ( mode & FALLOC_FL_ZERO_RANGE ) ) {
2021-11-06 20:39:42 +03:00
struct quota_res quota_res = { 0 } ;
s64 i_sectors_delta = 0 ;
bch2_fpunch_at ( & trans , & iter , inode_inum ( inode ) ,
end_sector , & i_sectors_delta ) ;
i_sectors_acct ( c , inode , & quota_res , i_sectors_delta ) ;
bch2_quota_reservation_put ( c , inode , & quota_res ) ;
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-04-17 03:35:20 +03:00
bch2_trans_exit ( & trans ) ;
return ret ;
}
2021-03-20 03:29:11 +03:00
2021-04-17 03:35:20 +03:00
static long bchfs_fallocate ( struct bch_inode_info * inode , int mode ,
loff_t offset , loff_t len )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2021-11-06 20:39:42 +03:00
u64 end = offset + len ;
u64 block_start = round_down ( offset , block_bytes ( c ) ) ;
u64 block_end = round_up ( end , block_bytes ( c ) ) ;
bool truncated_last_page = false ;
int ret , ret2 = 0 ;
2021-04-17 03:35:20 +03:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) & & end > inode - > v . i_size ) {
ret = inode_newsize_ok ( & inode - > v , end ) ;
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2021-04-17 03:35:20 +03:00
}
if ( mode & FALLOC_FL_ZERO_RANGE ) {
2021-11-06 20:39:42 +03:00
ret = bch2_truncate_pages ( inode , offset , end ) ;
if ( unlikely ( ret < 0 ) )
return ret ;
2021-04-17 03:35:20 +03:00
2021-11-06 20:39:42 +03:00
truncated_last_page = ret ;
2021-04-17 03:35:20 +03:00
truncate_pagecache_range ( & inode - > v , offset , end - 1 ) ;
2021-11-06 20:39:42 +03:00
block_start = round_up ( offset , block_bytes ( c ) ) ;
block_end = round_down ( end , block_bytes ( c ) ) ;
2021-04-17 03:35:20 +03:00
}
ret = __bchfs_fallocate ( inode , mode , block_start > > 9 , block_end > > 9 ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 18:12:48 +03:00
/*
2021-11-06 20:39:42 +03:00
* On - ENOSPC in ZERO_RANGE mode , we still want to do the inode update ,
* so that the VFS cache i_size is consistent with the btree i_size :
2019-10-09 18:12:48 +03:00
*/
2021-11-06 20:39:42 +03:00
if ( ret & &
2022-09-19 00:10:33 +03:00
! ( bch2_err_matches ( ret , ENOSPC ) & & ( mode & FALLOC_FL_ZERO_RANGE ) ) )
2021-11-06 20:39:42 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
if ( mode & FALLOC_FL_KEEP_SIZE & & end > inode - > v . i_size )
end = inode - > v . i_size ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
if ( end > = inode - > v . i_size & &
( ( ( mode & FALLOC_FL_ZERO_RANGE ) & & ! truncated_last_page ) | |
! ( mode & FALLOC_FL_KEEP_SIZE ) ) ) {
spin_lock ( & inode - > v . i_lock ) ;
i_size_write ( & inode - > v , end ) ;
spin_unlock ( & inode - > v . i_lock ) ;
2019-10-09 18:12:48 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-11-06 20:39:42 +03:00
ret2 = bch2_write_inode_size ( c , inode , end , 0 ) ;
2019-10-09 18:12:48 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
}
2021-11-06 20:39:42 +03:00
return ret ? : ret2 ;
2017-03-17 09:18:50 +03:00
}
long bch2_fallocate_dispatch ( struct file * file , int mode ,
loff_t offset , loff_t len )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
2019-10-20 02:03:23 +03:00
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
long ret ;
2017-03-17 09:18:50 +03:00
2023-02-09 20:21:45 +03:00
if ( ! bch2_write_ref_tryget ( c , BCH_WRITE_REF_fallocate ) )
2019-10-20 02:03:23 +03:00
return - EROFS ;
2019-09-08 01:04:23 +03:00
2021-11-06 20:39:42 +03:00
inode_lock ( & inode - > v ) ;
inode_dio_wait ( & inode - > v ) ;
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_get ( inode ) ;
2021-11-06 20:39:42 +03:00
2022-10-13 07:44:34 +03:00
ret = file_modified ( file ) ;
if ( ret )
goto err ;
2019-10-20 02:03:23 +03:00
if ( ! ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_ZERO_RANGE ) ) )
ret = bchfs_fallocate ( inode , mode , offset , len ) ;
else if ( mode = = ( FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE ) )
ret = bchfs_fpunch ( inode , offset , len ) ;
else if ( mode = = FALLOC_FL_INSERT_RANGE )
ret = bchfs_fcollapse_finsert ( inode , offset , len , true ) ;
else if ( mode = = FALLOC_FL_COLLAPSE_RANGE )
ret = bchfs_fcollapse_finsert ( inode , offset , len , false ) ;
else
ret = - EOPNOTSUPP ;
2022-10-13 07:44:34 +03:00
err :
2022-11-04 20:25:57 +03:00
bch2_pagecache_block_put ( inode ) ;
2021-11-06 20:39:42 +03:00
inode_unlock ( & inode - > v ) ;
2023-02-09 20:21:45 +03:00
bch2_write_ref_put ( c , BCH_WRITE_REF_fallocate ) ;
2017-03-17 09:18:50 +03:00
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
2022-10-11 11:32:41 +03:00
/*
* Take a quota reservation for unallocated blocks in a given file range
* Does not check pagecache
*/
2022-10-11 11:32:14 +03:00
static int quota_reserve_range ( struct bch_inode_info * inode ,
struct quota_res * res ,
u64 start , u64 end )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
u32 snapshot ;
u64 sectors = end - start ;
u64 pos = start ;
int ret ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inode - > ei_subvol , & snapshot ) ;
if ( ret )
goto err ;
bch2_trans_iter_init ( & trans , & iter , BTREE_ID_extents ,
SPOS ( inode - > v . i_ino , pos , snapshot ) , 0 ) ;
while ( ! ( ret = btree_trans_too_many_iters ( & trans ) ) & &
( k = bch2_btree_iter_peek_upto ( & iter , POS ( inode - > v . i_ino , end - 1 ) ) ) . k & &
! ( ret = bkey_err ( k ) ) ) {
if ( bkey_extent_is_allocation ( k . k ) ) {
u64 s = min ( end , k . k - > p . offset ) -
max ( start , bkey_start_offset ( k . k ) ) ;
BUG_ON ( s > sectors ) ;
sectors - = s ;
}
bch2_btree_iter_advance ( & iter ) ;
}
pos = iter . pos . offset ;
bch2_trans_iter_exit ( & trans , & iter ) ;
err :
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
goto retry ;
bch2_trans_exit ( & trans ) ;
if ( ret )
return ret ;
return bch2_quota_reservation_add ( c , inode , res , sectors , true ) ;
}
2019-08-16 16:59:56 +03:00
loff_t bch2_remap_file_range ( struct file * file_src , loff_t pos_src ,
struct file * file_dst , loff_t pos_dst ,
loff_t len , unsigned remap_flags )
{
struct bch_inode_info * src = file_bch_inode ( file_src ) ;
struct bch_inode_info * dst = file_bch_inode ( file_dst ) ;
struct bch_fs * c = src - > v . i_sb - > s_fs_info ;
2022-10-11 11:32:14 +03:00
struct quota_res quota_res = { 0 } ;
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2019-11-05 06:22:13 +03:00
u64 aligned_len ;
2019-08-16 16:59:56 +03:00
loff_t ret = 0 ;
if ( remap_flags & ~ ( REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY ) )
return - EINVAL ;
if ( remap_flags & REMAP_FILE_DEDUP )
return - EOPNOTSUPP ;
if ( ( pos_src & ( block_bytes ( c ) - 1 ) ) | |
( pos_dst & ( block_bytes ( c ) - 1 ) ) )
return - EINVAL ;
if ( src = = dst & &
abs ( pos_src - pos_dst ) < len )
return - EINVAL ;
bch2_lock_inodes ( INODE_LOCK | INODE_PAGECACHE_BLOCK , src , dst ) ;
inode_dio_wait ( & src - > v ) ;
inode_dio_wait ( & dst - > v ) ;
ret = generic_remap_file_range_prep ( file_src , pos_src ,
file_dst , pos_dst ,
& len , remap_flags ) ;
if ( ret < 0 | | len = = 0 )
2019-10-10 19:47:22 +03:00
goto err ;
2019-08-16 16:59:56 +03:00
2019-11-05 06:22:13 +03:00
aligned_len = round_up ( ( u64 ) len , block_bytes ( c ) ) ;
2019-08-16 16:59:56 +03:00
ret = write_invalidate_inode_pages_range ( dst - > v . i_mapping ,
2019-11-05 06:22:13 +03:00
pos_dst , pos_dst + len - 1 ) ;
2019-08-16 16:59:56 +03:00
if ( ret )
2019-10-10 19:47:22 +03:00
goto err ;
2019-08-16 16:59:56 +03:00
2022-10-11 11:32:14 +03:00
ret = quota_reserve_range ( dst , & quota_res , pos_dst > > 9 ,
( pos_dst + aligned_len ) > > 9 ) ;
if ( ret )
goto err ;
file_update_time ( file_dst ) ;
2021-11-24 02:21:09 +03:00
mark_pagecache_unallocated ( src , pos_src > > 9 ,
( pos_src + aligned_len ) > > 9 ) ;
2019-08-16 16:59:56 +03:00
2019-10-10 19:47:22 +03:00
ret = bch2_remap_range ( c ,
2021-03-16 07:28:17 +03:00
inode_inum ( dst ) , pos_dst > > 9 ,
inode_inum ( src ) , pos_src > > 9 ,
2019-08-16 16:59:56 +03:00
aligned_len > > 9 ,
2019-10-10 19:47:22 +03:00
pos_dst + len , & i_sectors_delta ) ;
if ( ret < 0 )
goto err ;
2019-08-16 16:59:56 +03:00
2019-10-10 19:47:22 +03:00
/*
* due to alignment , we might have remapped slightly more than requsted
*/
2019-11-05 06:22:13 +03:00
ret = min ( ( u64 ) ret < < 9 , ( u64 ) len ) ;
2019-10-10 19:47:22 +03:00
2022-10-11 11:32:14 +03:00
i_sectors_acct ( c , dst , & quota_res , i_sectors_delta ) ;
2019-10-10 19:47:22 +03:00
spin_lock ( & dst - > v . i_lock ) ;
2019-11-05 06:22:13 +03:00
if ( pos_dst + ret > dst - > v . i_size )
i_size_write ( & dst - > v , pos_dst + ret ) ;
2019-10-10 19:47:22 +03:00
spin_unlock ( & dst - > v . i_lock ) ;
2021-05-20 04:21:49 +03:00
2021-11-05 22:17:13 +03:00
if ( ( file_dst - > f_flags & ( __O_SYNC | O_DSYNC ) ) | |
IS_SYNC ( file_inode ( file_dst ) ) )
ret = bch2_flush_inode ( c , inode_inum ( dst ) ) ;
2019-10-10 19:47:22 +03:00
err :
2022-10-11 11:32:14 +03:00
bch2_quota_reservation_put ( c , dst , & quota_res ) ;
2019-08-16 16:59:56 +03:00
bch2_unlock_inodes ( INODE_LOCK | INODE_PAGECACHE_BLOCK , src , dst ) ;
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2019-08-16 16:59:56 +03:00
}
2017-03-17 09:18:50 +03:00
/* fseek: */
2019-07-30 20:49:17 +03:00
static int folio_data_offset ( struct folio * folio , unsigned offset )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
struct bch_page_state * s = bch2_page_state ( & folio - > page ) ;
unsigned i ;
2019-07-30 20:49:17 +03:00
if ( s )
for ( i = offset > > 9 ; i < PAGE_SECTORS ; i + + )
if ( s - > s [ i ] . state > = SECTOR_DIRTY )
return i < < 9 ;
2018-11-15 05:53:40 +03:00
2019-07-30 20:49:17 +03:00
return - 1 ;
2017-03-17 09:18:50 +03:00
}
2019-07-30 20:49:17 +03:00
static loff_t bch2_seek_pagecache_data ( struct inode * vinode ,
2017-03-17 09:18:50 +03:00
loff_t start_offset ,
loff_t end_offset )
{
struct folio_batch fbatch ;
pgoff_t start_index = start_offset > > PAGE_SHIFT ;
pgoff_t end_index = end_offset > > PAGE_SHIFT ;
pgoff_t index = start_index ;
unsigned i ;
2019-07-30 20:49:17 +03:00
loff_t ret ;
int offset ;
2017-03-17 09:18:50 +03:00
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( vinode - > i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
folio_lock ( folio ) ;
2019-07-30 20:49:17 +03:00
offset = folio_data_offset ( folio ,
folio - > index = = start_index
? start_offset & ( PAGE_SIZE - 1 )
: 0 ) ;
if ( offset > = 0 ) {
ret = clamp ( ( ( loff_t ) folio - > index < < PAGE_SHIFT ) +
offset ,
start_offset , end_offset ) ;
2017-03-17 09:18:50 +03:00
folio_unlock ( folio ) ;
folio_batch_release ( & fbatch ) ;
2019-07-30 20:49:17 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
return end_offset ;
}
static loff_t bch2_seek_data ( struct file * file , u64 offset )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
subvol_inum inum = inode_inum ( inode ) ;
2017-03-17 09:18:50 +03:00
u64 isize , next_data = MAX_LFS_FILESIZE ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
int ret ;
isize = i_size_read ( & inode - > v ) ;
if ( offset > = isize )
return - ENXIO ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2022-10-11 11:32:41 +03:00
for_each_btree_key_upto_norestart ( & trans , iter , BTREE_ID_extents ,
SPOS ( inode - > v . i_ino , offset > > 9 , snapshot ) ,
POS ( inode - > v . i_ino , U64_MAX ) ,
0 , k , ret ) {
if ( bkey_extent_is_data ( k . k ) ) {
2017-03-17 09:18:50 +03:00
next_data = max ( offset , bkey_start_offset ( k . k ) < < 9 ) ;
break ;
} else if ( k . k - > p . offset > > 9 > isize )
break ;
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-03-16 07:28:17 +03:00
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
if ( next_data > offset )
2019-07-30 20:49:17 +03:00
next_data = bch2_seek_pagecache_data ( & inode - > v ,
2017-03-17 09:18:50 +03:00
offset , next_data ) ;
2019-07-30 19:46:53 +03:00
if ( next_data > = isize )
2017-03-17 09:18:50 +03:00
return - ENXIO ;
return vfs_setpos ( file , next_data , MAX_LFS_FILESIZE ) ;
}
2019-07-30 20:49:17 +03:00
static int __page_hole_offset ( struct page * page , unsigned offset )
2017-03-17 09:18:50 +03:00
{
2019-07-30 20:49:17 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
unsigned i ;
if ( ! s )
return 0 ;
for ( i = offset > > 9 ; i < PAGE_SECTORS ; i + + )
if ( s - > s [ i ] . state < SECTOR_DIRTY )
return i < < 9 ;
return - 1 ;
}
static loff_t page_hole_offset ( struct address_space * mapping , loff_t offset )
{
pgoff_t index = offset > > PAGE_SHIFT ;
2017-03-17 09:18:50 +03:00
struct page * page ;
2019-07-30 20:49:17 +03:00
int pg_offset ;
loff_t ret = - 1 ;
2017-03-17 09:18:50 +03:00
page = find_lock_page ( mapping , index ) ;
if ( ! page )
2019-07-30 20:49:17 +03:00
return offset ;
pg_offset = __page_hole_offset ( page , offset & ( PAGE_SIZE - 1 ) ) ;
if ( pg_offset > = 0 )
ret = ( ( loff_t ) index < < PAGE_SHIFT ) + pg_offset ;
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
return ret ;
}
2019-07-30 20:49:17 +03:00
static loff_t bch2_seek_pagecache_hole ( struct inode * vinode ,
2017-03-17 09:18:50 +03:00
loff_t start_offset ,
loff_t end_offset )
{
struct address_space * mapping = vinode - > i_mapping ;
2019-07-30 20:49:17 +03:00
loff_t offset = start_offset , hole ;
2017-03-17 09:18:50 +03:00
2019-07-30 20:49:17 +03:00
while ( offset < end_offset ) {
hole = page_hole_offset ( mapping , offset ) ;
if ( hole > = 0 & & hole < = end_offset )
return max ( start_offset , hole ) ;
offset + = PAGE_SIZE ;
offset & = PAGE_MASK ;
}
2017-03-17 09:18:50 +03:00
return end_offset ;
}
static loff_t bch2_seek_hole ( struct file * file , u64 offset )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
subvol_inum inum = inode_inum ( inode ) ;
2017-03-17 09:18:50 +03:00
u64 isize , next_hole = MAX_LFS_FILESIZE ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
int ret ;
isize = i_size_read ( & inode - > v ) ;
if ( offset > = isize )
return - ENXIO ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
2021-03-16 07:28:17 +03:00
SPOS ( inode - > v . i_ino , offset > > 9 , snapshot ) ,
2019-04-17 22:49:28 +03:00
BTREE_ITER_SLOTS , k , ret ) {
2017-03-17 09:18:50 +03:00
if ( k . k - > p . inode ! = inode - > v . i_ino ) {
2019-07-30 20:49:17 +03:00
next_hole = bch2_seek_pagecache_hole ( & inode - > v ,
2017-03-17 09:18:50 +03:00
offset , MAX_LFS_FILESIZE ) ;
break ;
} else if ( ! bkey_extent_is_data ( k . k ) ) {
2019-07-30 20:49:17 +03:00
next_hole = bch2_seek_pagecache_hole ( & inode - > v ,
2017-03-17 09:18:50 +03:00
max ( offset , bkey_start_offset ( k . k ) < < 9 ) ,
k . k - > p . offset < < 9 ) ;
if ( next_hole < k . k - > p . offset < < 9 )
break ;
} else {
offset = max ( offset , bkey_start_offset ( k . k ) < < 9 ) ;
}
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
2022-07-18 06:06:38 +03:00
if ( bch2_err_matches ( ret , BCH_ERR_transaction_restart ) )
2021-03-16 07:28:17 +03:00
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
if ( next_hole > isize )
next_hole = isize ;
return vfs_setpos ( file , next_hole , MAX_LFS_FILESIZE ) ;
}
loff_t bch2_llseek ( struct file * file , loff_t offset , int whence )
{
2022-09-18 22:43:50 +03:00
loff_t ret ;
2017-03-17 09:18:50 +03:00
switch ( whence ) {
case SEEK_SET :
case SEEK_CUR :
case SEEK_END :
2022-09-18 22:43:50 +03:00
ret = generic_file_llseek ( file , offset , whence ) ;
break ;
2017-03-17 09:18:50 +03:00
case SEEK_DATA :
2022-09-18 22:43:50 +03:00
ret = bch2_seek_data ( file , offset ) ;
break ;
2017-03-17 09:18:50 +03:00
case SEEK_HOLE :
2022-09-18 22:43:50 +03:00
ret = bch2_seek_hole ( file , offset ) ;
break ;
default :
ret = - EINVAL ;
break ;
2017-03-17 09:18:50 +03:00
}
2022-09-18 22:43:50 +03:00
return bch2_err_class ( ret ) ;
2017-03-17 09:18:50 +03:00
}
void bch2_fs_fsio_exit ( struct bch_fs * c )
{
bioset_exit ( & c - > dio_write_bioset ) ;
bioset_exit ( & c - > dio_read_bioset ) ;
bioset_exit ( & c - > writepage_bioset ) ;
}
int bch2_fs_fsio_init ( struct bch_fs * c )
{
int ret = 0 ;
pr_verbose_init ( c - > opts , " " ) ;
if ( bioset_init ( & c - > writepage_bioset ,
2019-10-09 19:50:39 +03:00
4 , offsetof ( struct bch_writepage_io , op . wbio . bio ) ,
2017-03-17 09:18:50 +03:00
BIOSET_NEED_BVECS ) | |
bioset_init ( & c - > dio_read_bioset ,
4 , offsetof ( struct dio_read , rbio . bio ) ,
BIOSET_NEED_BVECS ) | |
bioset_init ( & c - > dio_write_bioset ,
2019-10-09 19:50:39 +03:00
4 , offsetof ( struct dio_write , op . wbio . bio ) ,
2017-03-17 09:18:50 +03:00
BIOSET_NEED_BVECS ) )
ret = - ENOMEM ;
pr_verbose_init ( c - > opts , " ret %i " , ret ) ;
return ret ;
}
# endif /* NO_BCACHEFS_FS */