2017-03-17 09:18:50 +03:00
// SPDX-License-Identifier: GPL-2.0
# ifndef NO_BCACHEFS_FS
# include "bcachefs.h"
2018-10-06 07:46:55 +03:00
# include "alloc_foreground.h"
2020-12-17 23:08:58 +03:00
# include "bkey_buf.h"
2017-03-17 09:18:50 +03:00
# include "btree_update.h"
# include "buckets.h"
# include "clock.h"
# include "error.h"
2018-08-06 00:46:41 +03:00
# include "extents.h"
2019-11-15 23:52:28 +03:00
# include "extent_update.h"
2017-03-17 09:18:50 +03:00
# include "fs.h"
# include "fs-io.h"
# include "fsck.h"
# include "inode.h"
# include "journal.h"
# include "io.h"
# include "keylist.h"
# include "quota.h"
2019-08-16 16:59:56 +03:00
# include "reflink.h"
2017-03-17 09:18:50 +03:00
# include "trace.h"
# include <linux/aio.h>
# include <linux/backing-dev.h>
# include <linux/falloc.h>
# include <linux/migrate.h>
# include <linux/mmu_context.h>
# include <linux/pagevec.h>
2020-10-09 07:09:20 +03:00
# include <linux/rmap.h>
2017-03-17 09:18:50 +03:00
# include <linux/sched/signal.h>
# include <linux/task_io_accounting_ops.h>
# include <linux/uio.h>
# include <linux/writeback.h>
# include <trace/events/writeback.h>
2019-07-29 19:24:36 +03:00
static inline bool bio_full ( struct bio * bio , unsigned len )
{
if ( bio - > bi_vcnt > = bio - > bi_max_vecs )
return true ;
if ( bio - > bi_iter . bi_size > UINT_MAX - len )
return true ;
return false ;
}
2020-11-11 20:33:12 +03:00
static inline struct address_space * faults_disabled_mapping ( void )
{
return ( void * ) ( ( ( unsigned long ) current - > faults_disabled_mapping ) & ~ 1UL ) ;
}
static inline void set_fdm_dropped_locks ( void )
{
current - > faults_disabled_mapping =
( void * ) ( ( ( unsigned long ) current - > faults_disabled_mapping ) | 1 ) ;
}
static inline bool fdm_dropped_locks ( void )
{
return ( ( unsigned long ) current - > faults_disabled_mapping ) & 1 ;
}
2017-03-17 09:18:50 +03:00
struct quota_res {
u64 sectors ;
} ;
struct bch_writepage_io {
2019-10-09 19:50:39 +03:00
struct bch_inode_info * inode ;
2017-03-17 09:18:50 +03:00
/* must be last: */
2019-10-09 19:50:39 +03:00
struct bch_write_op op ;
2017-03-17 09:18:50 +03:00
} ;
struct dio_write {
2019-11-02 04:16:51 +03:00
struct completion done ;
2017-03-17 09:18:50 +03:00
struct kiocb * req ;
2019-01-14 05:36:14 +03:00
struct mm_struct * mm ;
2017-03-17 09:18:50 +03:00
unsigned loop : 1 ,
sync : 1 ,
free_iov : 1 ;
struct quota_res quota_res ;
2020-06-30 01:22:06 +03:00
u64 written ;
2017-03-17 09:18:50 +03:00
struct iov_iter iter ;
struct iovec inline_vecs [ 2 ] ;
/* must be last: */
2019-10-09 19:50:39 +03:00
struct bch_write_op op ;
2017-03-17 09:18:50 +03:00
} ;
struct dio_read {
struct closure cl ;
struct kiocb * req ;
long ret ;
2021-01-21 22:42:23 +03:00
bool should_dirty ;
2017-03-17 09:18:50 +03:00
struct bch_read_bio rbio ;
} ;
/* pagecache_block must be held */
2022-11-02 23:45:28 +03:00
static noinline int write_invalidate_inode_pages_range ( struct address_space * mapping ,
2017-03-17 09:18:50 +03:00
loff_t start , loff_t end )
{
int ret ;
/*
* XXX : the way this is currently implemented , we can spin if a process
* is continually redirtying a specific page
*/
do {
if ( ! mapping - > nrpages )
return 0 ;
ret = filemap_write_and_wait_range ( mapping , start , end ) ;
if ( ret )
break ;
if ( ! mapping - > nrpages )
return 0 ;
ret = invalidate_inode_pages2_range ( mapping ,
start > > PAGE_SHIFT ,
end > > PAGE_SHIFT ) ;
} while ( ret = = - EBUSY ) ;
return ret ;
}
/* quotas */
# ifdef CONFIG_BCACHEFS_QUOTA
static void bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res )
{
if ( ! res - > sectors )
return ;
mutex_lock ( & inode - > ei_quota_lock ) ;
BUG_ON ( res - > sectors > inode - > ei_quota_reserved ) ;
bch2_quota_acct ( c , inode - > ei_qid , Q_SPC ,
2018-11-01 22:10:01 +03:00
- ( ( s64 ) res - > sectors ) , KEY_TYPE_QUOTA_PREALLOC ) ;
2017-03-17 09:18:50 +03:00
inode - > ei_quota_reserved - = res - > sectors ;
mutex_unlock ( & inode - > ei_quota_lock ) ;
res - > sectors = 0 ;
}
static int bch2_quota_reservation_add ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res ,
unsigned sectors ,
bool check_enospc )
{
int ret ;
mutex_lock ( & inode - > ei_quota_lock ) ;
ret = bch2_quota_acct ( c , inode - > ei_qid , Q_SPC , sectors ,
2018-11-01 22:10:01 +03:00
check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK ) ;
2017-03-17 09:18:50 +03:00
if ( likely ( ! ret ) ) {
inode - > ei_quota_reserved + = sectors ;
res - > sectors + = sectors ;
}
mutex_unlock ( & inode - > ei_quota_lock ) ;
return ret ;
}
# else
static void bch2_quota_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res )
{
}
static int bch2_quota_reservation_add ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct quota_res * res ,
unsigned sectors ,
bool check_enospc )
{
return 0 ;
}
# endif
/* i_size updates: */
2018-07-17 21:12:42 +03:00
struct inode_new_size {
loff_t new_size ;
u64 now ;
unsigned fields ;
} ;
2017-03-17 09:18:50 +03:00
static int inode_set_size ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi ,
void * p )
{
2018-07-17 21:12:42 +03:00
struct inode_new_size * s = p ;
2017-03-17 09:18:50 +03:00
2018-07-17 21:12:42 +03:00
bi - > bi_size = s - > new_size ;
if ( s - > fields & ATTR_ATIME )
bi - > bi_atime = s - > now ;
if ( s - > fields & ATTR_MTIME )
bi - > bi_mtime = s - > now ;
if ( s - > fields & ATTR_CTIME )
bi - > bi_ctime = s - > now ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
2019-08-16 16:59:56 +03:00
int __must_check bch2_write_inode_size ( struct bch_fs * c ,
struct bch_inode_info * inode ,
loff_t new_size , unsigned fields )
2017-03-17 09:18:50 +03:00
{
2018-07-17 21:12:42 +03:00
struct inode_new_size s = {
. new_size = new_size ,
. now = bch2_current_time ( c ) ,
. fields = fields ,
} ;
return bch2_write_inode ( c , inode , inode_set_size , & s , fields ) ;
2017-03-17 09:18:50 +03:00
}
static void i_sectors_acct ( struct bch_fs * c , struct bch_inode_info * inode ,
2018-08-06 00:48:00 +03:00
struct quota_res * quota_res , s64 sectors )
2017-03-17 09:18:50 +03:00
{
2018-08-06 00:48:00 +03:00
if ( ! sectors )
return ;
2017-03-17 09:18:50 +03:00
mutex_lock ( & inode - > ei_quota_lock ) ;
2021-11-24 01:05:56 +03:00
BUG_ON ( ( s64 ) inode - > v . i_blocks + sectors < 0 ) ;
inode - > v . i_blocks + = sectors ;
2017-03-17 09:18:50 +03:00
# ifdef CONFIG_BCACHEFS_QUOTA
if ( quota_res & & sectors > 0 ) {
BUG_ON ( sectors > quota_res - > sectors ) ;
BUG_ON ( sectors > inode - > ei_quota_reserved ) ;
quota_res - > sectors - = sectors ;
inode - > ei_quota_reserved - = sectors ;
} else {
2018-11-01 22:10:01 +03:00
bch2_quota_acct ( c , inode - > ei_qid , Q_SPC , sectors , KEY_TYPE_QUOTA_WARN ) ;
2017-03-17 09:18:50 +03:00
}
# endif
mutex_unlock ( & inode - > ei_quota_lock ) ;
}
/* page state: */
/* stored in page->private: */
2019-07-03 04:41:35 +03:00
struct bch_page_sector {
2021-11-24 01:05:56 +03:00
/* Uncompressed, fully allocated replicas (or on disk reservation): */
unsigned nr_replicas : 4 ;
2017-03-17 09:18:50 +03:00
2021-11-24 01:05:56 +03:00
/* Owns PAGE_SECTORS * replicas_reserved sized in memory reservation: */
unsigned replicas_reserved : 4 ;
2019-07-03 04:41:35 +03:00
/* i_sectors: */
enum {
SECTOR_UNALLOCATED ,
2019-07-30 21:18:29 +03:00
SECTOR_RESERVED ,
2019-07-03 04:41:35 +03:00
SECTOR_DIRTY ,
2021-11-24 01:05:56 +03:00
SECTOR_DIRTY_RESERVED ,
2019-07-03 04:41:35 +03:00
SECTOR_ALLOCATED ,
2021-11-24 01:05:56 +03:00
} state : 8 ;
2019-07-03 04:41:35 +03:00
} ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
struct bch_page_state {
2019-10-09 16:19:06 +03:00
spinlock_t lock ;
2019-07-29 19:24:36 +03:00
atomic_t write_count ;
2021-11-24 02:17:04 +03:00
bool uptodate ;
2019-07-03 04:41:35 +03:00
struct bch_page_sector s [ PAGE_SECTORS ] ;
2017-03-17 09:18:50 +03:00
} ;
2019-07-03 04:41:35 +03:00
static inline struct bch_page_state * __bch2_page_state ( struct page * page )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
return page_has_private ( page )
? ( struct bch_page_state * ) page_private ( page )
: NULL ;
}
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
static inline struct bch_page_state * bch2_page_state ( struct page * page )
{
2019-07-02 21:59:15 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
return __bch2_page_state ( page ) ;
}
/* for newly allocated pages: */
static void __bch2_page_state_release ( struct page * page )
{
2020-11-05 18:58:38 +03:00
kfree ( detach_page_private ( page ) ) ;
2019-07-03 04:41:35 +03:00
}
static void bch2_page_state_release ( struct page * page )
{
2020-11-05 18:58:38 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
__bch2_page_state_release ( page ) ;
2019-07-03 04:41:35 +03:00
}
/* for newly allocated pages: */
static struct bch_page_state * __bch2_page_state_create ( struct page * page ,
gfp_t gfp )
{
struct bch_page_state * s ;
s = kzalloc ( sizeof ( * s ) , GFP_NOFS | gfp ) ;
if ( ! s )
return NULL ;
2017-03-17 09:18:50 +03:00
2019-10-09 16:19:06 +03:00
spin_lock_init ( & s - > lock ) ;
2020-11-05 18:58:38 +03:00
attach_page_private ( page , s ) ;
2017-03-17 09:18:50 +03:00
return s ;
}
2019-07-03 04:41:35 +03:00
static struct bch_page_state * bch2_page_state_create ( struct page * page ,
gfp_t gfp )
{
return bch2_page_state ( page ) ? : __bch2_page_state_create ( page , gfp ) ;
}
2021-11-24 01:05:56 +03:00
static unsigned bkey_to_sector_state ( const struct bkey * k )
{
if ( k - > type = = KEY_TYPE_reservation )
return SECTOR_RESERVED ;
if ( bkey_extent_is_allocation ( k ) )
return SECTOR_ALLOCATED ;
return SECTOR_UNALLOCATED ;
}
2021-11-24 02:17:04 +03:00
static void __bch2_page_state_set ( struct page * page ,
unsigned pg_offset , unsigned pg_len ,
unsigned nr_ptrs , unsigned state )
{
struct bch_page_state * s = bch2_page_state_create ( page , __GFP_NOFAIL ) ;
unsigned i ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
spin_lock ( & s - > lock ) ;
for ( i = pg_offset ; i < pg_offset + pg_len ; i + + ) {
s - > s [ i ] . nr_replicas = nr_ptrs ;
s - > s [ i ] . state = state ;
}
if ( i = = PAGE_SECTORS )
s - > uptodate = true ;
spin_unlock ( & s - > lock ) ;
}
static int bch2_page_state_set ( struct bch_fs * c , subvol_inum inum ,
struct page * * pages , unsigned nr_pages )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
u64 offset = pages [ 0 ] - > index < < PAGE_SECTORS_SHIFT ;
unsigned pg_idx = 0 ;
u32 snapshot ;
int ret ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
SPOS ( inum . inum , offset , snapshot ) ,
BTREE_ITER_SLOTS , k , ret ) {
unsigned nr_ptrs = bch2_bkey_nr_ptrs_fully_allocated ( k ) ;
unsigned state = bkey_to_sector_state ( k . k ) ;
while ( pg_idx < nr_pages ) {
struct page * page = pages [ pg_idx ] ;
u64 pg_start = page - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( page - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( bkey_start_offset ( k . k ) , pg_start ) - pg_start ;
unsigned pg_len = min ( k . k - > p . offset , pg_end ) - pg_offset - pg_start ;
BUG_ON ( k . k - > p . offset < pg_start ) ;
BUG_ON ( bkey_start_offset ( k . k ) > pg_end ) ;
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate )
__bch2_page_state_set ( page , pg_offset , pg_len , nr_ptrs , state ) ;
if ( k . k - > p . offset < pg_end )
break ;
pg_idx + + ;
}
if ( pg_idx = = nr_pages )
break ;
}
offset = iter . pos . offset ;
bch2_trans_iter_exit ( & trans , & iter ) ;
err :
if ( ret = = - EINTR )
goto retry ;
bch2_trans_exit ( & trans ) ;
return ret ;
}
2021-11-24 01:05:56 +03:00
static void bch2_bio_page_state_set ( struct bio * bio , struct bkey_s_c k )
{
struct bvec_iter iter ;
struct bio_vec bv ;
unsigned nr_ptrs = k . k - > type = = KEY_TYPE_reflink_v
? 0 : bch2_bkey_nr_ptrs_fully_allocated ( k ) ;
unsigned state = bkey_to_sector_state ( k . k ) ;
2021-11-24 02:17:04 +03:00
bio_for_each_segment ( bv , bio , iter )
__bch2_page_state_set ( bv . bv_page , bv . bv_offset > > 9 ,
bv . bv_len > > 9 , nr_ptrs , state ) ;
2021-11-24 01:05:56 +03:00
}
2021-11-24 02:21:09 +03:00
static void mark_pagecache_unallocated ( struct bch_inode_info * inode ,
u64 start , u64 end )
{
pgoff_t index = start > > PAGE_SECTORS_SHIFT ;
pgoff_t end_index = ( end - 1 ) > > PAGE_SECTORS_SHIFT ;
struct folio_batch fbatch ;
unsigned i , j ;
if ( end < = start )
return ;
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( inode - > v . i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
u64 pg_start = folio - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( folio - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( start , pg_start ) - pg_start ;
unsigned pg_len = min ( end , pg_end ) - pg_offset - pg_start ;
struct bch_page_state * s ;
BUG_ON ( end < = pg_start ) ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
folio_lock ( folio ) ;
s = bch2_page_state ( & folio - > page ) ;
if ( s ) {
spin_lock ( & s - > lock ) ;
for ( j = pg_offset ; j < pg_offset + pg_len ; j + + )
s - > s [ j ] . nr_replicas = 0 ;
spin_unlock ( & s - > lock ) ;
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
}
static void mark_pagecache_reserved ( struct bch_inode_info * inode ,
u64 start , u64 end )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
pgoff_t index = start > > PAGE_SECTORS_SHIFT ;
pgoff_t end_index = ( end - 1 ) > > PAGE_SECTORS_SHIFT ;
struct folio_batch fbatch ;
s64 i_sectors_delta = 0 ;
unsigned i , j ;
if ( end < = start )
return ;
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( inode - > v . i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
u64 pg_start = folio - > index < < PAGE_SECTORS_SHIFT ;
u64 pg_end = ( folio - > index + 1 ) < < PAGE_SECTORS_SHIFT ;
unsigned pg_offset = max ( start , pg_start ) - pg_start ;
unsigned pg_len = min ( end , pg_end ) - pg_offset - pg_start ;
struct bch_page_state * s ;
BUG_ON ( end < = pg_start ) ;
BUG_ON ( pg_offset > = PAGE_SECTORS ) ;
BUG_ON ( pg_offset + pg_len > PAGE_SECTORS ) ;
folio_lock ( folio ) ;
s = bch2_page_state ( & folio - > page ) ;
if ( s ) {
spin_lock ( & s - > lock ) ;
for ( j = pg_offset ; j < pg_offset + pg_len ; j + + )
switch ( s - > s [ j ] . state ) {
case SECTOR_UNALLOCATED :
s - > s [ j ] . state = SECTOR_RESERVED ;
break ;
case SECTOR_DIRTY :
s - > s [ j ] . state = SECTOR_DIRTY_RESERVED ;
i_sectors_delta - - ;
break ;
default :
break ;
}
spin_unlock ( & s - > lock ) ;
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
}
2019-07-02 21:59:15 +03:00
static inline unsigned inode_nr_replicas ( struct bch_fs * c , struct bch_inode_info * inode )
{
/* XXX: this should not be open coded */
return inode - > ei_inode . bi_data_replicas
? inode - > ei_inode . bi_data_replicas - 1
: c - > opts . data_replicas ;
}
2019-07-03 04:41:35 +03:00
static inline unsigned sectors_to_reserve ( struct bch_page_sector * s ,
unsigned nr_replicas )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
return max ( 0 , ( int ) nr_replicas -
s - > nr_replicas -
s - > replicas_reserved ) ;
}
static int bch2_get_page_disk_reservation ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct page * page , bool check_enospc )
{
struct bch_page_state * s = bch2_page_state_create ( page , 0 ) ;
2019-07-02 21:59:15 +03:00
unsigned nr_replicas = inode_nr_replicas ( c , inode ) ;
2019-07-03 04:41:35 +03:00
struct disk_reservation disk_res = { 0 } ;
unsigned i , disk_res_sectors = 0 ;
int ret ;
if ( ! s )
return - ENOMEM ;
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + )
disk_res_sectors + = sectors_to_reserve ( & s - > s [ i ] , nr_replicas ) ;
if ( ! disk_res_sectors )
return 0 ;
ret = bch2_disk_reservation_get ( c , & disk_res ,
disk_res_sectors , 1 ,
! check_enospc
? BCH_DISK_RESERVATION_NOFAIL
: 0 ) ;
if ( unlikely ( ret ) )
return ret ;
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + )
s - > s [ i ] . replicas_reserved + =
sectors_to_reserve ( & s - > s [ i ] , nr_replicas ) ;
return 0 ;
}
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation {
struct disk_reservation disk ;
struct quota_res quota ;
} ;
static void bch2_page_reservation_init ( struct bch_fs * c ,
2019-07-03 04:41:35 +03:00
struct bch_inode_info * inode ,
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res )
{
memset ( res , 0 , sizeof ( * res ) ) ;
res - > disk . nr_replicas = inode_nr_replicas ( c , inode ) ;
}
static void bch2_page_reservation_put ( struct bch_fs * c ,
struct bch_inode_info * inode ,
struct bch2_page_reservation * res )
{
bch2_disk_reservation_put ( c , & res - > disk ) ;
bch2_quota_reservation_put ( c , inode , & res - > quota ) ;
}
static int bch2_page_reservation_get ( struct bch_fs * c ,
struct bch_inode_info * inode , struct page * page ,
struct bch2_page_reservation * res ,
unsigned offset , unsigned len , bool check_enospc )
2019-07-03 04:41:35 +03:00
{
struct bch_page_state * s = bch2_page_state_create ( page , 0 ) ;
2019-07-29 20:38:38 +03:00
unsigned i , disk_sectors = 0 , quota_sectors = 0 ;
2018-11-15 05:53:40 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
if ( ! s )
return - ENOMEM ;
2017-03-17 09:18:50 +03:00
2021-11-24 02:17:04 +03:00
BUG_ON ( ! s - > uptodate ) ;
2019-08-22 03:16:42 +03:00
for ( i = round_down ( offset , block_bytes ( c ) ) > > 9 ;
i < round_up ( offset + len , block_bytes ( c ) ) > > 9 ;
2019-07-29 20:38:38 +03:00
i + + ) {
disk_sectors + = sectors_to_reserve ( & s - > s [ i ] ,
res - > disk . nr_replicas ) ;
quota_sectors + = s - > s [ i ] . state = = SECTOR_UNALLOCATED ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
if ( disk_sectors ) {
ret = bch2_disk_reservation_add ( c , & res - > disk ,
disk_sectors ,
! check_enospc
? BCH_DISK_RESERVATION_NOFAIL
: 0 ) ;
if ( unlikely ( ret ) )
return ret ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
if ( quota_sectors ) {
ret = bch2_quota_reservation_add ( c , inode , & res - > quota ,
quota_sectors ,
check_enospc ) ;
if ( unlikely ( ret ) ) {
struct disk_reservation tmp = {
. sectors = disk_sectors
} ;
bch2_disk_reservation_put ( c , & tmp ) ;
res - > disk . sectors - = disk_sectors ;
return ret ;
}
}
2017-03-17 09:18:50 +03:00
2019-07-03 00:25:05 +03:00
return 0 ;
2017-03-17 09:18:50 +03:00
}
static void bch2_clear_page_bits ( struct page * page )
{
struct bch_inode_info * inode = to_bch_ei ( page - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-03 04:41:35 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
2019-07-29 20:38:38 +03:00
struct disk_reservation disk_res = { 0 } ;
2019-07-03 04:41:35 +03:00
int i , dirty_sectors = 0 ;
2018-11-15 05:53:40 +03:00
2019-07-03 04:41:35 +03:00
if ( ! s )
2017-03-17 09:18:50 +03:00
return ;
2019-10-09 16:19:06 +03:00
EBUG_ON ( ! PageLocked ( page ) ) ;
EBUG_ON ( PageWriteback ( page ) ) ;
2019-07-03 04:41:35 +03:00
for ( i = 0 ; i < ARRAY_SIZE ( s - > s ) ; i + + ) {
2019-07-29 20:38:38 +03:00
disk_res . sectors + = s - > s [ i ] . replicas_reserved ;
s - > s [ i ] . replicas_reserved = 0 ;
2021-11-24 01:05:56 +03:00
switch ( s - > s [ i ] . state ) {
case SECTOR_DIRTY :
2019-07-03 04:41:35 +03:00
s - > s [ i ] . state = SECTOR_UNALLOCATED ;
2021-11-24 01:05:56 +03:00
- - dirty_sectors ;
break ;
case SECTOR_DIRTY_RESERVED :
s - > s [ i ] . state = SECTOR_RESERVED ;
break ;
default :
break ;
2019-07-03 04:41:35 +03:00
}
}
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
bch2_disk_reservation_put ( c , & disk_res ) ;
2021-11-24 02:21:09 +03:00
i_sectors_acct ( c , inode , NULL , dirty_sectors ) ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
bch2_page_state_release ( page ) ;
2017-03-17 09:18:50 +03:00
}
2019-07-29 20:38:38 +03:00
static void bch2_set_page_dirty ( struct bch_fs * c ,
struct bch_inode_info * inode , struct page * page ,
struct bch2_page_reservation * res ,
unsigned offset , unsigned len )
2017-03-17 09:18:50 +03:00
{
2019-07-29 20:38:38 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
2019-07-03 04:41:35 +03:00
unsigned i , dirty_sectors = 0 ;
2017-03-17 09:18:50 +03:00
2019-09-27 02:09:08 +03:00
WARN_ON ( ( u64 ) page_offset ( page ) + offset + len >
round_up ( ( u64 ) i_size_read ( & inode - > v ) , block_bytes ( c ) ) ) ;
2019-09-24 20:33:11 +03:00
2019-10-09 16:19:06 +03:00
spin_lock ( & s - > lock ) ;
2019-08-22 03:16:42 +03:00
for ( i = round_down ( offset , block_bytes ( c ) ) > > 9 ;
i < round_up ( offset + len , block_bytes ( c ) ) > > 9 ;
2019-07-29 20:38:38 +03:00
i + + ) {
unsigned sectors = sectors_to_reserve ( & s - > s [ i ] ,
res - > disk . nr_replicas ) ;
2017-03-17 09:18:50 +03:00
2019-10-26 01:54:58 +03:00
/*
* This can happen if we race with the error path in
* bch2_writepage_io_done ( ) :
*/
sectors = min_t ( unsigned , sectors , res - > disk . sectors ) ;
2019-07-29 20:38:38 +03:00
s - > s [ i ] . replicas_reserved + = sectors ;
res - > disk . sectors - = sectors ;
2019-07-03 00:25:05 +03:00
2021-11-24 01:05:56 +03:00
switch ( s - > s [ i ] . state ) {
case SECTOR_UNALLOCATED :
s - > s [ i ] . state = SECTOR_DIRTY ;
2019-07-03 04:41:35 +03:00
dirty_sectors + + ;
2021-11-24 01:05:56 +03:00
break ;
case SECTOR_RESERVED :
s - > s [ i ] . state = SECTOR_DIRTY_RESERVED ;
break ;
default :
break ;
}
2019-07-03 04:41:35 +03:00
}
2019-10-09 16:19:06 +03:00
spin_unlock ( & s - > lock ) ;
2021-11-24 02:21:09 +03:00
i_sectors_acct ( c , inode , & res - > quota , dirty_sectors ) ;
2019-07-02 21:59:15 +03:00
2019-07-29 20:38:38 +03:00
if ( ! PageDirty ( page ) )
filemap_dirty_folio ( inode - > v . i_mapping , page_folio ( page ) ) ;
2017-03-17 09:18:50 +03:00
}
vm_fault_t bch2_page_fault ( struct vm_fault * vmf )
{
struct file * file = vmf - > vma - > vm_file ;
2020-11-11 20:33:12 +03:00
struct address_space * mapping = file - > f_mapping ;
struct address_space * fdm = faults_disabled_mapping ( ) ;
2017-03-17 09:18:50 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
int ret ;
2020-11-11 20:33:12 +03:00
if ( fdm = = mapping )
return VM_FAULT_SIGBUS ;
/* Lock ordering: */
if ( fdm > mapping ) {
struct bch_inode_info * fdm_host = to_bch_ei ( fdm - > host ) ;
if ( bch2_pagecache_add_tryget ( & inode - > ei_pagecache_lock ) )
goto got_lock ;
bch2_pagecache_block_put ( & fdm_host - > ei_pagecache_lock ) ;
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
bch2_pagecache_block_get ( & fdm_host - > ei_pagecache_lock ) ;
/* Signal that lock has been dropped: */
set_fdm_dropped_locks ( ) ;
return VM_FAULT_SIGBUS ;
}
2017-03-17 09:18:50 +03:00
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
2020-11-11 20:33:12 +03:00
got_lock :
2017-03-17 09:18:50 +03:00
ret = filemap_fault ( vmf ) ;
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
return ret ;
}
vm_fault_t bch2_page_mkwrite ( struct vm_fault * vmf )
{
struct page * page = vmf - > page ;
struct file * file = vmf - > vma - > vm_file ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct address_space * mapping = file - > f_mapping ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation res ;
2019-09-20 01:05:04 +03:00
unsigned len ;
loff_t isize ;
2021-11-24 02:17:04 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
2019-07-29 20:38:38 +03:00
bch2_page_reservation_init ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
sb_start_pagefault ( inode - > v . i_sb ) ;
file_update_time ( file ) ;
/*
* Not strictly necessary , but helps avoid dio writes livelocking in
* write_invalidate_inode_pages_range ( ) - can drop this if / when we get
* a write_invalidate_inode_pages_range ( ) that works without dropping
* page lock before invalidating page
*/
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
lock_page ( page ) ;
2019-09-20 01:05:04 +03:00
isize = i_size_read ( & inode - > v ) ;
if ( page - > mapping ! = mapping | | page_offset ( page ) > = isize ) {
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
ret = VM_FAULT_NOPAGE ;
goto out ;
}
2019-09-25 22:26:14 +03:00
len = min_t ( loff_t , PAGE_SIZE , isize - page_offset ( page ) ) ;
2019-09-20 01:05:04 +03:00
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
if ( bch2_page_state_set ( c , inode_inum ( inode ) , & page , 1 ) ) {
unlock_page ( page ) ;
ret = VM_FAULT_SIGBUS ;
goto out ;
}
}
2019-09-20 01:05:04 +03:00
if ( bch2_page_reservation_get ( c , inode , page , & res , 0 , len , true ) ) {
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
ret = VM_FAULT_SIGBUS ;
goto out ;
}
2019-09-20 01:05:04 +03:00
bch2_set_page_dirty ( c , inode , page , & res , 0 , len ) ;
2019-10-19 01:24:26 +03:00
bch2_page_reservation_put ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
wait_for_stable_page ( page ) ;
2021-11-24 02:17:04 +03:00
ret = VM_FAULT_LOCKED ;
2017-03-17 09:18:50 +03:00
out :
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
sb_end_pagefault ( inode - > v . i_sb ) ;
2019-07-29 20:38:38 +03:00
2017-03-17 09:18:50 +03:00
return ret ;
}
void bch2_invalidate_folio ( struct folio * folio , size_t offset , size_t length )
{
if ( offset | | length < folio_size ( folio ) )
return ;
bch2_clear_page_bits ( & folio - > page ) ;
}
bool bch2_release_folio ( struct folio * folio , gfp_t gfp_mask )
{
2022-12-25 06:45:11 +03:00
if ( folio_test_dirty ( folio ) | | folio_test_writeback ( folio ) )
2017-03-17 09:18:50 +03:00
return false ;
bch2_clear_page_bits ( & folio - > page ) ;
return true ;
}
/* readpage(s): */
static void bch2_readpages_end_io ( struct bio * bio )
{
struct bvec_iter_all iter ;
struct bio_vec * bv ;
bio_for_each_segment_all ( bv , bio , iter ) {
struct page * page = bv - > bv_page ;
if ( ! bio - > bi_status ) {
SetPageUptodate ( page ) ;
} else {
ClearPageUptodate ( page ) ;
SetPageError ( page ) ;
}
unlock_page ( page ) ;
}
bio_put ( bio ) ;
}
struct readpages_iter {
struct address_space * mapping ;
struct page * * pages ;
unsigned nr_pages ;
unsigned idx ;
pgoff_t offset ;
} ;
static int readpages_iter_init ( struct readpages_iter * iter ,
struct readahead_control * ractl )
{
unsigned i , nr_pages = readahead_count ( ractl ) ;
memset ( iter , 0 , sizeof ( * iter ) ) ;
iter - > mapping = ractl - > mapping ;
iter - > offset = readahead_index ( ractl ) ;
iter - > nr_pages = nr_pages ;
iter - > pages = kmalloc_array ( nr_pages , sizeof ( struct page * ) , GFP_NOFS ) ;
if ( ! iter - > pages )
return - ENOMEM ;
2020-11-30 00:00:47 +03:00
nr_pages = __readahead_batch ( ractl , iter - > pages , nr_pages ) ;
2017-03-17 09:18:50 +03:00
for ( i = 0 ; i < nr_pages ; i + + ) {
2019-07-03 04:41:35 +03:00
__bch2_page_state_create ( iter - > pages [ i ] , __GFP_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
put_page ( iter - > pages [ i ] ) ;
}
return 0 ;
}
static inline struct page * readpage_iter_next ( struct readpages_iter * iter )
{
if ( iter - > idx > = iter - > nr_pages )
return NULL ;
EBUG_ON ( iter - > pages [ iter - > idx ] - > index ! = iter - > offset + iter - > idx ) ;
return iter - > pages [ iter - > idx ] ;
}
2019-11-10 00:01:15 +03:00
static bool extent_partial_reads_expensive ( struct bkey_s_c k )
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c ( k ) ;
struct bch_extent_crc_unpacked crc ;
const union bch_extent_entry * i ;
bkey_for_each_crc ( k . k , ptrs , crc , i )
if ( crc . csum_type | | crc . compression_type )
return true ;
return false ;
}
2017-03-17 09:18:50 +03:00
static void readpage_bio_extend ( struct readpages_iter * iter ,
2019-08-16 16:59:56 +03:00
struct bio * bio ,
unsigned sectors_this_extent ,
2017-03-17 09:18:50 +03:00
bool get_more )
{
2019-08-16 16:59:56 +03:00
while ( bio_sectors ( bio ) < sectors_this_extent & &
2017-03-17 09:18:50 +03:00
bio - > bi_vcnt < bio - > bi_max_vecs ) {
2021-11-24 04:00:34 +03:00
pgoff_t page_offset = bio_end_sector ( bio ) > > PAGE_SECTORS_SHIFT ;
2017-03-17 09:18:50 +03:00
struct page * page = readpage_iter_next ( iter ) ;
int ret ;
if ( page ) {
if ( iter - > offset + iter - > idx ! = page_offset )
break ;
iter - > idx + + ;
} else {
if ( ! get_more )
break ;
page = xa_load ( & iter - > mapping - > i_pages , page_offset ) ;
if ( page & & ! xa_is_value ( page ) )
break ;
page = __page_cache_alloc ( readahead_gfp_mask ( iter - > mapping ) ) ;
if ( ! page )
break ;
2019-07-03 04:41:35 +03:00
if ( ! __bch2_page_state_create ( page , 0 ) ) {
put_page ( page ) ;
break ;
}
2017-03-17 09:18:50 +03:00
ret = add_to_page_cache_lru ( page , iter - > mapping ,
page_offset , GFP_NOFS ) ;
if ( ret ) {
2019-07-03 04:41:35 +03:00
__bch2_page_state_release ( page ) ;
2017-03-17 09:18:50 +03:00
put_page ( page ) ;
break ;
}
put_page ( page ) ;
}
2019-07-04 02:27:42 +03:00
BUG_ON ( ! bio_add_page ( bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
}
}
2021-03-13 04:30:39 +03:00
static void bchfs_read ( struct btree_trans * trans ,
struct bch_read_bio * rbio ,
subvol_inum inum ,
2017-03-17 09:18:50 +03:00
struct readpages_iter * readpages_iter )
{
2019-03-28 05:03:30 +03:00
struct bch_fs * c = trans - > c ;
2021-03-13 04:30:39 +03:00
struct btree_iter iter ;
2020-12-17 23:08:58 +03:00
struct bkey_buf sk ;
2017-03-17 09:18:50 +03:00
int flags = BCH_READ_RETRY_IF_STALE |
BCH_READ_MAY_PROMOTE ;
2021-03-13 04:30:39 +03:00
u32 snapshot ;
2019-08-16 16:59:56 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
rbio - > c = c ;
rbio - > start_time = local_clock ( ) ;
2021-03-13 04:30:39 +03:00
rbio - > subvol = inum . subvol ;
2019-11-10 00:01:15 +03:00
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_init ( & sk ) ;
2019-08-16 16:59:56 +03:00
retry :
2021-07-25 03:24:10 +03:00
bch2_trans_begin ( trans ) ;
2021-03-13 04:30:39 +03:00
iter = ( struct btree_iter ) { NULL } ;
2021-07-25 03:24:10 +03:00
2021-03-13 04:30:39 +03:00
ret = bch2_subvolume_get_snapshot ( trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
bch2_trans_iter_init ( trans , & iter , BTREE_ID_extents ,
SPOS ( inum . inum , rbio - > bio . bi_iter . bi_sector , snapshot ) ,
BTREE_ITER_SLOTS | BTREE_ITER_FILTER_SNAPSHOTS ) ;
2017-03-17 09:18:50 +03:00
while ( 1 ) {
struct bkey_s_c k ;
2019-08-16 16:59:56 +03:00
unsigned bytes , sectors , offset_into_extent ;
2021-03-15 04:30:08 +03:00
enum btree_id data_btree = BTREE_ID_extents ;
2017-03-17 09:18:50 +03:00
2021-08-05 20:02:39 +03:00
/*
* read_extent - > io_time_reset may cause a transaction restart
* without returning an error , we need to check for that here :
*/
if ( ! bch2_trans_relock ( trans ) ) {
ret = - EINTR ;
break ;
}
2021-03-13 04:30:39 +03:00
bch2_btree_iter_set_pos ( & iter ,
POS ( inum . inum , rbio - > bio . bi_iter . bi_sector ) ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
2019-08-16 16:59:56 +03:00
ret = bkey_err ( k ) ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
offset_into_extent = iter . pos . offset -
2019-07-09 19:56:43 +03:00
bkey_start_offset ( k . k ) ;
2019-08-16 16:59:56 +03:00
sectors = k . k - > size - offset_into_extent ;
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & sk , c , k ) ;
2020-10-25 03:56:47 +03:00
2021-03-15 04:30:08 +03:00
ret = bch2_read_indirect_extent ( trans , & data_btree ,
2020-05-22 17:50:05 +03:00
& offset_into_extent , & sk ) ;
2019-08-16 16:59:56 +03:00
if ( ret )
break ;
2020-10-25 03:56:47 +03:00
k = bkey_i_to_s_c ( sk . k ) ;
2019-08-16 16:59:56 +03:00
sectors = min ( sectors , k . k - > size - offset_into_extent ) ;
bch2_trans_unlock ( trans ) ;
2019-07-09 19:56:43 +03:00
2019-11-10 00:01:15 +03:00
if ( readpages_iter )
readpage_bio_extend ( readpages_iter , & rbio - > bio , sectors ,
extent_partial_reads_expensive ( k ) ) ;
2017-03-17 09:18:50 +03:00
2019-08-16 16:59:56 +03:00
bytes = min ( sectors , bio_sectors ( & rbio - > bio ) ) < < 9 ;
2019-07-09 19:56:43 +03:00
swap ( rbio - > bio . bi_iter . bi_size , bytes ) ;
2017-03-17 09:18:50 +03:00
2019-07-09 19:56:43 +03:00
if ( rbio - > bio . bi_iter . bi_size = = bytes )
2017-03-17 09:18:50 +03:00
flags | = BCH_READ_LAST_FRAGMENT ;
2021-11-24 01:05:56 +03:00
bch2_bio_page_state_set ( & rbio - > bio , k ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
bch2_read_extent ( trans , rbio , iter . pos ,
2021-03-15 04:30:08 +03:00
data_btree , k , offset_into_extent , flags ) ;
2017-03-17 09:18:50 +03:00
if ( flags & BCH_READ_LAST_FRAGMENT )
2019-11-10 00:01:15 +03:00
break ;
2017-03-17 09:18:50 +03:00
2019-07-09 19:56:43 +03:00
swap ( rbio - > bio . bi_iter . bi_size , bytes ) ;
bio_advance ( & rbio - > bio , bytes ) ;
2021-11-24 03:00:23 +03:00
ret = btree_trans_too_many_iters ( trans ) ;
if ( ret )
break ;
2017-03-17 09:18:50 +03:00
}
2021-03-13 04:30:39 +03:00
err :
bch2_trans_iter_exit ( trans , & iter ) ;
2019-08-16 16:59:56 +03:00
if ( ret = = - EINTR )
goto retry ;
2019-11-10 00:01:15 +03:00
if ( ret ) {
2021-03-13 04:30:39 +03:00
bch_err_inum_ratelimited ( c , inum . inum ,
2020-12-03 21:57:22 +03:00
" read error %i from btree lookup " , ret ) ;
rbio - > bio . bi_status = BLK_STS_IOERR ;
2019-11-10 00:01:15 +03:00
bio_endio ( & rbio - > bio ) ;
}
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_exit ( & sk , c ) ;
2017-03-17 09:18:50 +03:00
}
void bch2_readahead ( struct readahead_control * ractl )
{
struct bch_inode_info * inode = to_bch_ei ( ractl - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-10-09 19:50:39 +03:00
struct bch_io_opts opts = io_opts ( c , & inode - > ei_inode ) ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2017-03-17 09:18:50 +03:00
struct page * page ;
struct readpages_iter readpages_iter ;
int ret ;
ret = readpages_iter_init ( & readpages_iter , ractl ) ;
BUG_ON ( ret ) ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2017-03-17 09:18:50 +03:00
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
while ( ( page = readpage_iter_next ( & readpages_iter ) ) ) {
pgoff_t index = readpages_iter . offset + readpages_iter . idx ;
unsigned n = min_t ( unsigned ,
readpages_iter . nr_pages -
readpages_iter . idx ,
BIO_MAX_VECS ) ;
struct bch_read_bio * rbio =
rbio_init ( bio_alloc_bioset ( NULL , n , REQ_OP_READ ,
GFP_NOFS , & c - > bio_read ) ,
opts ) ;
readpages_iter . idx + + ;
2021-11-24 04:00:34 +03:00
rbio - > bio . bi_iter . bi_sector = ( sector_t ) index < < PAGE_SECTORS_SHIFT ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_end_io = bch2_readpages_end_io ;
2019-07-04 02:27:42 +03:00
BUG_ON ( ! bio_add_page ( & rbio - > bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
2021-03-13 04:30:39 +03:00
bchfs_read ( & trans , rbio , inode_inum ( inode ) ,
2019-03-28 05:03:30 +03:00
& readpages_iter ) ;
2017-03-17 09:18:50 +03:00
}
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
2019-03-25 22:10:15 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
kfree ( readpages_iter . pages ) ;
}
static void __bchfs_readpage ( struct bch_fs * c , struct bch_read_bio * rbio ,
2021-03-13 04:30:39 +03:00
subvol_inum inum , struct page * page )
2017-03-17 09:18:50 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2017-03-17 09:18:50 +03:00
2019-07-03 04:41:35 +03:00
bch2_page_state_create ( page , __GFP_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_opf = REQ_OP_READ | REQ_SYNC ;
2019-07-29 19:24:36 +03:00
rbio - > bio . bi_iter . bi_sector =
2021-11-24 04:00:34 +03:00
( sector_t ) page - > index < < PAGE_SECTORS_SHIFT ;
2019-07-29 19:24:36 +03:00
BUG_ON ( ! bio_add_page ( & rbio - > bio , page , PAGE_SIZE , 0 ) ) ;
2017-03-17 09:18:50 +03:00
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-13 04:30:39 +03:00
bchfs_read ( & trans , rbio , inum , NULL ) ;
2019-03-25 22:10:15 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_read_single_page_end_io ( struct bio * bio )
{
complete ( bio - > bi_private ) ;
}
static int bch2_read_single_page ( struct page * page ,
struct address_space * mapping )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct bch_read_bio * rbio ;
int ret ;
DECLARE_COMPLETION_ONSTACK ( done ) ;
rbio = rbio_init ( bio_alloc_bioset ( NULL , 1 , REQ_OP_READ , GFP_NOFS , & c - > bio_read ) ,
2019-10-09 19:50:39 +03:00
io_opts ( c , & inode - > ei_inode ) ) ;
2017-03-17 09:18:50 +03:00
rbio - > bio . bi_private = & done ;
rbio - > bio . bi_end_io = bch2_read_single_page_end_io ;
2021-03-13 04:30:39 +03:00
__bchfs_readpage ( c , rbio , inode_inum ( inode ) , page ) ;
2017-03-17 09:18:50 +03:00
wait_for_completion ( & done ) ;
ret = blk_status_to_errno ( rbio - > bio . bi_status ) ;
bio_put ( & rbio - > bio ) ;
if ( ret < 0 )
return ret ;
SetPageUptodate ( page ) ;
return 0 ;
}
int bch2_read_folio ( struct file * file , struct folio * folio )
{
struct page * page = & folio - > page ;
int ret ;
ret = bch2_read_single_page ( page , page - > mapping ) ;
folio_unlock ( folio ) ;
return ret ;
}
/* writepages: */
struct bch_writepage_state {
struct bch_writepage_io * io ;
struct bch_io_opts opts ;
} ;
static inline struct bch_writepage_state bch_writepage_state_init ( struct bch_fs * c ,
struct bch_inode_info * inode )
{
2019-10-09 19:50:39 +03:00
return ( struct bch_writepage_state ) {
. opts = io_opts ( c , & inode - > ei_inode )
} ;
2017-03-17 09:18:50 +03:00
}
2022-10-29 09:47:33 +03:00
static void bch2_writepage_io_done ( struct bch_write_op * op )
2017-03-17 09:18:50 +03:00
{
2022-10-29 09:47:33 +03:00
struct bch_writepage_io * io =
container_of ( op , struct bch_writepage_io , op ) ;
2019-10-09 19:50:39 +03:00
struct bch_fs * c = io - > op . c ;
struct bio * bio = & io - > op . wbio . bio ;
2017-03-17 09:18:50 +03:00
struct bvec_iter_all iter ;
struct bio_vec * bvec ;
2019-08-13 10:16:52 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
2021-05-19 06:53:43 +03:00
up ( & io - > op . c - > io_in_flight ) ;
2019-10-09 19:50:39 +03:00
if ( io - > op . error ) {
2020-12-03 22:27:20 +03:00
set_bit ( EI_INODE_ERROR , & io - > inode - > ei_flags ) ;
2019-04-18 03:34:24 +03:00
bio_for_each_segment_all ( bvec , bio , iter ) {
2019-08-13 10:16:52 +03:00
struct bch_page_state * s ;
2017-03-17 09:18:50 +03:00
SetPageError ( bvec - > bv_page ) ;
2019-04-18 03:34:24 +03:00
mapping_set_error ( bvec - > bv_page - > mapping , - EIO ) ;
2019-08-13 10:16:52 +03:00
2019-10-09 16:19:06 +03:00
s = __bch2_page_state ( bvec - > bv_page ) ;
spin_lock ( & s - > lock ) ;
2019-08-13 10:16:52 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + )
s - > s [ i ] . nr_replicas = 0 ;
2019-10-09 16:19:06 +03:00
spin_unlock ( & s - > lock ) ;
2019-04-18 03:34:24 +03:00
}
2017-03-17 09:18:50 +03:00
}
2019-11-10 00:43:16 +03:00
if ( io - > op . flags & BCH_WRITE_WROTE_DATA_INLINE ) {
bio_for_each_segment_all ( bvec , bio , iter ) {
struct bch_page_state * s ;
s = __bch2_page_state ( bvec - > bv_page ) ;
spin_lock ( & s - > lock ) ;
for ( i = 0 ; i < PAGE_SECTORS ; i + + )
s - > s [ i ] . nr_replicas = 0 ;
spin_unlock ( & s - > lock ) ;
}
}
2017-03-17 09:18:50 +03:00
/*
* racing with fallocate can cause us to add fewer sectors than
* expected - but we shouldn ' t add more sectors than expected :
*/
2019-10-09 19:50:39 +03:00
BUG_ON ( io - > op . i_sectors_delta > 0 ) ;
2017-03-17 09:18:50 +03:00
/*
* ( error ( due to going RO ) halfway through a page can screw that up
* slightly )
* XXX wtf ?
2019-10-09 19:50:39 +03:00
BUG_ON ( io - > op . op . i_sectors_delta > = PAGE_SECTORS ) ;
2017-03-17 09:18:50 +03:00
*/
/*
* PageWriteback is effectively our ref on the inode - fixup i_blocks
* before calling end_page_writeback :
*/
2019-10-09 19:50:39 +03:00
i_sectors_acct ( c , io - > inode , NULL , io - > op . i_sectors_delta ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
bio_for_each_segment_all ( bvec , bio , iter ) {
struct bch_page_state * s = __bch2_page_state ( bvec - > bv_page ) ;
if ( atomic_dec_and_test ( & s - > write_count ) )
end_page_writeback ( bvec - > bv_page ) ;
}
2017-03-17 09:18:50 +03:00
2022-10-29 09:47:33 +03:00
bio_put ( & io - > op . wbio . bio ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_writepage_do_io ( struct bch_writepage_state * w )
{
struct bch_writepage_io * io = w - > io ;
2021-05-19 06:53:43 +03:00
down ( & io - > op . c - > io_in_flight ) ;
2017-03-17 09:18:50 +03:00
w - > io = NULL ;
2022-10-29 09:47:33 +03:00
closure_call ( & io - > op . cl , bch2_write , NULL , NULL ) ;
2017-03-17 09:18:50 +03:00
}
/*
* Get a bch_writepage_io and add @ page to it - appending to an existing one if
* possible , else allocating a new one :
*/
static void bch2_writepage_io_alloc ( struct bch_fs * c ,
2019-11-14 03:45:48 +03:00
struct writeback_control * wbc ,
2017-03-17 09:18:50 +03:00
struct bch_writepage_state * w ,
struct bch_inode_info * inode ,
2019-07-29 19:24:36 +03:00
u64 sector ,
2017-03-17 09:18:50 +03:00
unsigned nr_replicas )
{
struct bch_write_op * op ;
w - > io = container_of ( bio_alloc_bioset ( NULL , BIO_MAX_VECS ,
REQ_OP_WRITE ,
GFP_NOFS ,
& c - > writepage_bioset ) ,
2019-10-09 19:50:39 +03:00
struct bch_writepage_io , op . wbio . bio ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 19:50:39 +03:00
w - > io - > inode = inode ;
op = & w - > io - > op ;
bch2_write_op_init ( op , c , w - > opts ) ;
op - > target = w - > opts . foreground_target ;
2017-03-17 09:18:50 +03:00
op - > nr_replicas = nr_replicas ;
op - > res . nr_replicas = nr_replicas ;
op - > write_point = writepoint_hashed ( inode - > ei_last_dirtied ) ;
2021-03-13 04:30:39 +03:00
op - > subvol = inode - > ei_subvol ;
2019-07-29 19:24:36 +03:00
op - > pos = POS ( inode - > v . i_ino , sector ) ;
2022-10-29 09:47:33 +03:00
op - > end_io = bch2_writepage_io_done ;
2019-07-29 19:24:36 +03:00
op - > wbio . bio . bi_iter . bi_sector = sector ;
2019-11-14 03:45:48 +03:00
op - > wbio . bio . bi_opf = wbc_to_write_flags ( wbc ) ;
2017-03-17 09:18:50 +03:00
}
static int __bch2_writepage ( struct folio * folio ,
struct writeback_control * wbc ,
void * data )
{
struct page * page = & folio - > page ;
struct bch_inode_info * inode = to_bch_ei ( page - > mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct bch_writepage_state * w = data ;
2019-07-29 19:24:36 +03:00
struct bch_page_state * s , orig ;
unsigned i , offset , nr_replicas_this_write = U32_MAX ;
2017-03-17 09:18:50 +03:00
loff_t i_size = i_size_read ( & inode - > v ) ;
pgoff_t end_index = i_size > > PAGE_SHIFT ;
2019-07-02 21:59:15 +03:00
int ret ;
2017-03-17 09:18:50 +03:00
EBUG_ON ( ! PageUptodate ( page ) ) ;
/* Is the page fully inside i_size? */
if ( page - > index < end_index )
goto do_io ;
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & ( PAGE_SIZE - 1 ) ;
if ( page - > index > end_index | | ! offset ) {
unlock_page ( page ) ;
return 0 ;
}
/*
* The page straddles i_size . It must be zeroed out on each and every
* writepage invocation because it may be mmapped . " A file is mapped
* in multiples of the page size . For a file that is not a multiple of
* the page size , the remaining memory is zeroed when mapped , and
* writes to that region are not written out to the file . "
*/
zero_user_segment ( page , offset , PAGE_SIZE ) ;
do_io :
2019-07-03 04:41:35 +03:00
s = bch2_page_state_create ( page , __GFP_NOFAIL ) ;
2018-11-15 05:53:40 +03:00
2021-11-11 21:02:03 +03:00
/*
* Things get really hairy with errors during writeback :
*/
ret = bch2_get_page_disk_reservation ( c , inode , page , false ) ;
BUG_ON ( ret ) ;
2018-11-15 05:53:40 +03:00
2019-07-29 19:24:36 +03:00
/* Before unlocking the page, get copy of reservations: */
2021-11-11 21:02:03 +03:00
spin_lock ( & s - > lock ) ;
2019-07-29 19:24:36 +03:00
orig = * s ;
2021-11-11 21:02:03 +03:00
spin_unlock ( & s - > lock ) ;
2019-07-29 19:24:36 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + ) {
2019-07-30 21:18:29 +03:00
if ( s - > s [ i ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
continue ;
2019-07-03 04:41:35 +03:00
nr_replicas_this_write =
min_t ( unsigned , nr_replicas_this_write ,
s - > s [ i ] . nr_replicas +
s - > s [ i ] . replicas_reserved ) ;
2019-07-29 19:24:36 +03:00
}
2019-07-02 21:59:15 +03:00
2019-07-03 04:41:35 +03:00
for ( i = 0 ; i < PAGE_SECTORS ; i + + ) {
2019-07-30 21:18:29 +03:00
if ( s - > s [ i ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
continue ;
2019-07-03 04:41:35 +03:00
s - > s [ i ] . nr_replicas = w - > opts . compression
? 0 : nr_replicas_this_write ;
2019-07-02 21:59:15 +03:00
2019-07-03 04:41:35 +03:00
s - > s [ i ] . replicas_reserved = 0 ;
s - > s [ i ] . state = SECTOR_ALLOCATED ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
BUG_ON ( atomic_read ( & s - > write_count ) ) ;
atomic_set ( & s - > write_count , 1 ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( PageWriteback ( page ) ) ;
set_page_writeback ( page ) ;
2019-07-29 19:24:36 +03:00
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
2019-07-29 19:24:36 +03:00
offset = 0 ;
while ( 1 ) {
2021-11-11 21:02:03 +03:00
unsigned sectors = 0 , dirty_sectors = 0 , reserved_sectors = 0 ;
2019-07-29 19:24:36 +03:00
u64 sector ;
while ( offset < PAGE_SECTORS & &
2019-07-30 21:18:29 +03:00
orig . s [ offset ] . state < SECTOR_DIRTY )
2019-07-29 19:24:36 +03:00
offset + + ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( offset = = PAGE_SECTORS )
break ;
while ( offset + sectors < PAGE_SECTORS & &
2021-11-11 21:02:03 +03:00
orig . s [ offset + sectors ] . state > = SECTOR_DIRTY ) {
reserved_sectors + = orig . s [ offset + sectors ] . replicas_reserved ;
dirty_sectors + = orig . s [ offset + sectors ] . state = = SECTOR_DIRTY ;
2019-07-29 19:24:36 +03:00
sectors + + ;
}
2021-11-11 21:02:03 +03:00
BUG_ON ( ! sectors ) ;
2021-11-24 04:00:34 +03:00
sector = ( ( u64 ) page - > index < < PAGE_SECTORS_SHIFT ) + offset ;
2019-07-29 19:24:36 +03:00
if ( w - > io & &
2019-10-09 19:50:39 +03:00
( w - > io - > op . res . nr_replicas ! = nr_replicas_this_write | |
bio_full ( & w - > io - > op . wbio . bio , PAGE_SIZE ) | |
2020-04-29 22:28:25 +03:00
w - > io - > op . wbio . bio . bi_iter . bi_size + ( sectors < < 9 ) > =
( BIO_MAX_VECS * PAGE_SIZE ) | |
2019-10-09 19:50:39 +03:00
bio_end_sector ( & w - > io - > op . wbio . bio ) ! = sector ) )
2019-07-29 19:24:36 +03:00
bch2_writepage_do_io ( w ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( ! w - > io )
2019-11-14 03:45:48 +03:00
bch2_writepage_io_alloc ( c , wbc , w , inode , sector ,
2019-07-29 19:24:36 +03:00
nr_replicas_this_write ) ;
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
atomic_inc ( & s - > write_count ) ;
2019-10-09 19:50:39 +03:00
BUG_ON ( inode ! = w - > io - > inode ) ;
BUG_ON ( ! bio_add_page ( & w - > io - > op . wbio . bio , page ,
2019-07-29 19:24:36 +03:00
sectors < < 9 , offset < < 9 ) ) ;
2019-09-20 01:05:04 +03:00
/* Check for writing past i_size: */
2019-10-09 19:50:39 +03:00
WARN_ON ( ( bio_end_sector ( & w - > io - > op . wbio . bio ) < < 9 ) >
2019-09-24 20:33:11 +03:00
round_up ( i_size , block_bytes ( c ) ) ) ;
2019-09-20 01:05:04 +03:00
2019-10-09 19:50:39 +03:00
w - > io - > op . res . sectors + = reserved_sectors ;
w - > io - > op . i_sectors_delta - = dirty_sectors ;
2019-07-29 19:24:36 +03:00
w - > io - > op . new_i_size = i_size ;
offset + = sectors ;
}
2017-03-17 09:18:50 +03:00
2019-07-29 19:24:36 +03:00
if ( atomic_dec_and_test ( & s - > write_count ) )
end_page_writeback ( page ) ;
2017-03-17 09:18:50 +03:00
return 0 ;
}
int bch2_writepages ( struct address_space * mapping , struct writeback_control * wbc )
{
struct bch_fs * c = mapping - > host - > i_sb - > s_fs_info ;
struct bch_writepage_state w =
bch_writepage_state_init ( c , to_bch_ei ( mapping - > host ) ) ;
struct blk_plug plug ;
int ret ;
blk_start_plug ( & plug ) ;
ret = write_cache_pages ( mapping , wbc , __bch2_writepage , & w ) ;
if ( w . io )
bch2_writepage_do_io ( & w ) ;
blk_finish_plug ( & plug ) ;
return ret ;
}
int bch2_writepage ( struct page * page , struct writeback_control * wbc )
{
struct bch_fs * c = page - > mapping - > host - > i_sb - > s_fs_info ;
struct bch_writepage_state w =
bch_writepage_state_init ( c , to_bch_ei ( page - > mapping - > host ) ) ;
int ret ;
ret = __bch2_writepage ( page_folio ( page ) , wbc , & w ) ;
if ( w . io )
bch2_writepage_do_io ( & w ) ;
return ret ;
}
/* buffered writes: */
int bch2_write_begin ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len ,
struct page * * pagep , void * * fsdata )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res ;
2017-03-17 09:18:50 +03:00
pgoff_t index = pos > > PAGE_SHIFT ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
struct page * page ;
int ret = - ENOMEM ;
2019-07-29 20:38:38 +03:00
res = kmalloc ( sizeof ( * res ) , GFP_KERNEL ) ;
if ( ! res )
return - ENOMEM ;
bch2_page_reservation_init ( c , inode , res ) ;
* fsdata = res ;
2017-03-17 09:18:50 +03:00
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
page = grab_cache_page_write_begin ( mapping , index ) ;
if ( ! page )
goto err_unlock ;
if ( PageUptodate ( page ) )
goto out ;
/* If we're writing entire page, don't need to read it in first: */
if ( len = = PAGE_SIZE )
goto out ;
if ( ! offset & & pos + len > = inode - > v . i_size ) {
zero_user_segment ( page , len , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
goto out ;
}
if ( index > inode - > v . i_size > > PAGE_SHIFT ) {
zero_user_segments ( page , 0 , offset , offset + len , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
goto out ;
}
readpage :
ret = bch2_read_single_page ( page , mapping ) ;
if ( ret )
goto err ;
out :
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
ret = bch2_page_state_set ( c , inode_inum ( inode ) , & page , 1 ) ;
if ( ret )
goto out ;
}
2019-07-29 20:38:38 +03:00
ret = bch2_page_reservation_get ( c , inode , page , res ,
offset , len , true ) ;
2017-03-17 09:18:50 +03:00
if ( ret ) {
if ( ! PageUptodate ( page ) ) {
/*
* If the page hasn ' t been read in , we won ' t know if we
* actually need a reservation - we don ' t actually need
* to read here , we just need to check if the page is
* fully backed by uncompressed data :
*/
goto readpage ;
}
goto err ;
}
* pagep = page ;
return 0 ;
err :
unlock_page ( page ) ;
put_page ( page ) ;
* pagep = NULL ;
err_unlock :
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
2019-07-29 20:38:38 +03:00
kfree ( res ) ;
* fsdata = NULL ;
2017-03-17 09:18:50 +03:00
return ret ;
}
int bch2_write_end ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len , unsigned copied ,
struct page * page , void * fsdata )
{
struct bch_inode_info * inode = to_bch_ei ( mapping - > host ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation * res = fsdata ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
2017-03-17 09:18:50 +03:00
lockdep_assert_held ( & inode - > v . i_rwsem ) ;
if ( unlikely ( copied < len & & ! PageUptodate ( page ) ) ) {
/*
* The page needs to be read in , but that would destroy
* our partial write - simplest thing is to just force
* userspace to redo the write :
*/
zero_user ( page , 0 , PAGE_SIZE ) ;
flush_dcache_page ( page ) ;
copied = 0 ;
}
spin_lock ( & inode - > v . i_lock ) ;
if ( pos + copied > inode - > v . i_size )
i_size_write ( & inode - > v , pos + copied ) ;
spin_unlock ( & inode - > v . i_lock ) ;
if ( copied ) {
if ( ! PageUptodate ( page ) )
SetPageUptodate ( page ) ;
2019-07-29 20:38:38 +03:00
bch2_set_page_dirty ( c , inode , page , res , offset , copied ) ;
2017-03-17 09:18:50 +03:00
inode - > ei_last_dirtied = ( unsigned long ) current ;
}
unlock_page ( page ) ;
put_page ( page ) ;
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
2019-07-29 20:38:38 +03:00
bch2_page_reservation_put ( c , inode , res ) ;
kfree ( res ) ;
2017-03-17 09:18:50 +03:00
return copied ;
}
# define WRITE_BATCH_PAGES 32
static int __bch2_buffered_write ( struct bch_inode_info * inode ,
struct address_space * mapping ,
struct iov_iter * iter ,
loff_t pos , unsigned len )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct page * pages [ WRITE_BATCH_PAGES ] ;
2019-07-29 20:38:38 +03:00
struct bch2_page_reservation res ;
2017-03-17 09:18:50 +03:00
unsigned long index = pos > > PAGE_SHIFT ;
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
unsigned nr_pages = DIV_ROUND_UP ( offset + len , PAGE_SIZE ) ;
2019-07-29 20:38:38 +03:00
unsigned i , reserved = 0 , set_dirty = 0 ;
unsigned copied = 0 , nr_pages_copied = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
BUG_ON ( ! len ) ;
BUG_ON ( nr_pages > ARRAY_SIZE ( pages ) ) ;
2019-07-29 20:38:38 +03:00
bch2_page_reservation_init ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
for ( i = 0 ; i < nr_pages ; i + + ) {
pages [ i ] = grab_cache_page_write_begin ( mapping , index + i ) ;
if ( ! pages [ i ] ) {
nr_pages = i ;
2019-10-02 01:51:10 +03:00
if ( ! i ) {
ret = - ENOMEM ;
goto out ;
}
len = min_t ( unsigned , len ,
nr_pages * PAGE_SIZE - offset ) ;
break ;
2017-03-17 09:18:50 +03:00
}
}
if ( offset & & ! PageUptodate ( pages [ 0 ] ) ) {
ret = bch2_read_single_page ( pages [ 0 ] , mapping ) ;
if ( ret )
goto out ;
}
if ( ( pos + len ) & ( PAGE_SIZE - 1 ) & &
! PageUptodate ( pages [ nr_pages - 1 ] ) ) {
if ( ( index + nr_pages - 1 ) < < PAGE_SHIFT > = inode - > v . i_size ) {
zero_user ( pages [ nr_pages - 1 ] , 0 , PAGE_SIZE ) ;
} else {
ret = bch2_read_single_page ( pages [ nr_pages - 1 ] , mapping ) ;
if ( ret )
goto out ;
}
}
2019-07-29 20:38:38 +03:00
while ( reserved < len ) {
2021-11-24 02:17:04 +03:00
unsigned i = ( offset + reserved ) > > PAGE_SHIFT ;
struct page * page = pages [ i ] ;
2019-07-29 20:38:38 +03:00
unsigned pg_offset = ( offset + reserved ) & ( PAGE_SIZE - 1 ) ;
unsigned pg_len = min_t ( unsigned , len - reserved ,
PAGE_SIZE - pg_offset ) ;
2021-11-24 02:17:04 +03:00
if ( ! bch2_page_state_create ( page , __GFP_NOFAIL ) - > uptodate ) {
ret = bch2_page_state_set ( c , inode_inum ( inode ) ,
pages + i , nr_pages - i ) ;
if ( ret )
goto out ;
2017-03-17 09:18:50 +03:00
}
2021-11-24 02:17:04 +03:00
ret = bch2_page_reservation_get ( c , inode , page , & res ,
pg_offset , pg_len , true ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
goto out ;
2019-07-29 20:38:38 +03:00
reserved + = pg_len ;
2017-03-17 09:18:50 +03:00
}
if ( mapping_writably_mapped ( mapping ) )
for ( i = 0 ; i < nr_pages ; i + + )
flush_dcache_page ( pages [ i ] ) ;
while ( copied < len ) {
struct page * page = pages [ ( offset + copied ) > > PAGE_SHIFT ] ;
unsigned pg_offset = ( offset + copied ) & ( PAGE_SIZE - 1 ) ;
2019-07-29 20:38:38 +03:00
unsigned pg_len = min_t ( unsigned , len - copied ,
PAGE_SIZE - pg_offset ) ;
2017-03-17 09:18:50 +03:00
unsigned pg_copied = copy_page_from_iter_atomic ( page ,
2019-07-29 20:38:38 +03:00
pg_offset , pg_len , iter ) ;
if ( ! pg_copied )
break ;
2017-03-17 09:18:50 +03:00
2020-07-09 20:54:58 +03:00
if ( ! PageUptodate ( page ) & &
pg_copied ! = PAGE_SIZE & &
pos + copied + pg_copied < inode - > v . i_size ) {
zero_user ( page , 0 , PAGE_SIZE ) ;
break ;
}
2017-03-17 09:18:50 +03:00
flush_dcache_page ( page ) ;
copied + = pg_copied ;
2020-07-09 20:54:58 +03:00
if ( pg_copied ! = pg_len )
break ;
2017-03-17 09:18:50 +03:00
}
if ( ! copied )
goto out ;
2019-09-27 02:09:08 +03:00
spin_lock ( & inode - > v . i_lock ) ;
if ( pos + copied > inode - > v . i_size )
i_size_write ( & inode - > v , pos + copied ) ;
spin_unlock ( & inode - > v . i_lock ) ;
2019-07-29 20:38:38 +03:00
while ( set_dirty < copied ) {
struct page * page = pages [ ( offset + set_dirty ) > > PAGE_SHIFT ] ;
unsigned pg_offset = ( offset + set_dirty ) & ( PAGE_SIZE - 1 ) ;
unsigned pg_len = min_t ( unsigned , copied - set_dirty ,
PAGE_SIZE - pg_offset ) ;
if ( ! PageUptodate ( page ) )
SetPageUptodate ( page ) ;
bch2_set_page_dirty ( c , inode , page , & res , pg_offset , pg_len ) ;
unlock_page ( page ) ;
put_page ( page ) ;
set_dirty + = pg_len ;
}
2019-09-27 02:09:08 +03:00
nr_pages_copied = DIV_ROUND_UP ( offset + copied , PAGE_SIZE ) ;
inode - > ei_last_dirtied = ( unsigned long ) current ;
2019-07-29 20:38:38 +03:00
out :
2017-03-17 09:18:50 +03:00
for ( i = nr_pages_copied ; i < nr_pages ; i + + ) {
unlock_page ( pages [ i ] ) ;
put_page ( pages [ i ] ) ;
}
2019-07-29 20:38:38 +03:00
bch2_page_reservation_put ( c , inode , & res ) ;
2017-03-17 09:18:50 +03:00
return copied ? : ret ;
}
static ssize_t bch2_buffered_write ( struct kiocb * iocb , struct iov_iter * iter )
{
struct file * file = iocb - > ki_filp ;
struct address_space * mapping = file - > f_mapping ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
loff_t pos = iocb - > ki_pos ;
ssize_t written = 0 ;
int ret = 0 ;
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
do {
unsigned offset = pos & ( PAGE_SIZE - 1 ) ;
unsigned bytes = min_t ( unsigned long , iov_iter_count ( iter ) ,
PAGE_SIZE * WRITE_BATCH_PAGES - offset ) ;
again :
/*
* Bring in the user page that we will copy from _first_ .
* Otherwise there ' s a nasty deadlock on copying from the
* same page as we ' re writing to , without it being marked
* up - to - date .
*
* Not only is this an optimisation , but it is also required
* to check that the address is actually valid , when atomic
* usercopies are used , below .
*/
if ( unlikely ( fault_in_iov_iter_readable ( iter , bytes ) ) ) {
bytes = min_t ( unsigned long , iov_iter_count ( iter ) ,
PAGE_SIZE - offset ) ;
if ( unlikely ( fault_in_iov_iter_readable ( iter , bytes ) ) ) {
ret = - EFAULT ;
break ;
}
}
if ( unlikely ( fatal_signal_pending ( current ) ) ) {
ret = - EINTR ;
break ;
}
ret = __bch2_buffered_write ( inode , mapping , iter , pos , bytes ) ;
if ( unlikely ( ret < 0 ) )
break ;
cond_resched ( ) ;
if ( unlikely ( ret = = 0 ) ) {
/*
* If we were unable to copy any data at all , we must
* fall back to a single segment length write .
*
* If we didn ' t fallback here , we could livelock
* because not all segments in the iov can be copied at
* once without a pagefault .
*/
bytes = min_t ( unsigned long , PAGE_SIZE - offset ,
iov_iter_single_seg_count ( iter ) ) ;
goto again ;
}
pos + = ret ;
written + = ret ;
2020-07-09 20:54:58 +03:00
ret = 0 ;
2017-03-17 09:18:50 +03:00
balance_dirty_pages_ratelimited ( mapping ) ;
} while ( iov_iter_count ( iter ) ) ;
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
return written ? written : ret ;
}
/* O_DIRECT reads */
2021-01-21 22:42:23 +03:00
static void bio_check_or_release ( struct bio * bio , bool check_dirty )
{
if ( check_dirty ) {
bio_check_pages_dirty ( bio ) ;
} else {
bio_release_pages ( bio , false ) ;
bio_put ( bio ) ;
}
}
2017-03-17 09:18:50 +03:00
static void bch2_dio_read_complete ( struct closure * cl )
{
struct dio_read * dio = container_of ( cl , struct dio_read , cl ) ;
dio - > req - > ki_complete ( dio - > req , dio - > ret ) ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( & dio - > rbio . bio , dio - > should_dirty ) ;
2017-03-17 09:18:50 +03:00
}
static void bch2_direct_IO_read_endio ( struct bio * bio )
{
struct dio_read * dio = bio - > bi_private ;
if ( bio - > bi_status )
dio - > ret = blk_status_to_errno ( bio - > bi_status ) ;
closure_put ( & dio - > cl ) ;
}
static void bch2_direct_IO_read_split_endio ( struct bio * bio )
{
2021-01-21 22:42:23 +03:00
struct dio_read * dio = bio - > bi_private ;
bool should_dirty = dio - > should_dirty ;
2017-03-17 09:18:50 +03:00
bch2_direct_IO_read_endio ( bio ) ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( bio , should_dirty ) ;
2017-03-17 09:18:50 +03:00
}
static int bch2_direct_IO_read ( struct kiocb * req , struct iov_iter * iter )
{
struct file * file = req - > ki_filp ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-10-09 19:50:39 +03:00
struct bch_io_opts opts = io_opts ( c , & inode - > ei_inode ) ;
2017-03-17 09:18:50 +03:00
struct dio_read * dio ;
struct bio * bio ;
loff_t offset = req - > ki_pos ;
bool sync = is_sync_kiocb ( req ) ;
size_t shorten ;
ssize_t ret ;
if ( ( offset | iter - > count ) & ( block_bytes ( c ) - 1 ) )
return - EINVAL ;
ret = min_t ( loff_t , iter - > count ,
max_t ( loff_t , 0 , i_size_read ( & inode - > v ) - offset ) ) ;
if ( ! ret )
return ret ;
shorten = iov_iter_count ( iter ) - round_up ( ret , block_bytes ( c ) ) ;
iter - > count - = shorten ;
bio = bio_alloc_bioset ( NULL ,
iov_iter_npages ( iter , BIO_MAX_VECS ) ,
REQ_OP_READ ,
GFP_KERNEL ,
& c - > dio_read_bioset ) ;
bio - > bi_end_io = bch2_direct_IO_read_endio ;
dio = container_of ( bio , struct dio_read , rbio . bio ) ;
closure_init ( & dio - > cl , NULL ) ;
/*
* this is a _really_ horrible hack just to avoid an atomic sub at the
* end :
*/
if ( ! sync ) {
set_closure_fn ( & dio - > cl , bch2_dio_read_complete , NULL ) ;
atomic_set ( & dio - > cl . remaining ,
CLOSURE_REMAINING_INITIALIZER -
CLOSURE_RUNNING +
CLOSURE_DESTRUCTOR ) ;
} else {
atomic_set ( & dio - > cl . remaining ,
CLOSURE_REMAINING_INITIALIZER + 1 ) ;
}
dio - > req = req ;
dio - > ret = ret ;
2021-01-21 22:42:23 +03:00
/*
* This is one of the sketchier things I ' ve encountered : we have to skip
* the dirtying of requests that are internal from the kernel ( i . e . from
* loopback ) , because we ' ll deadlock on page_lock .
*/
dio - > should_dirty = iter_is_iovec ( iter ) ;
2017-03-17 09:18:50 +03:00
goto start ;
while ( iter - > count ) {
bio = bio_alloc_bioset ( NULL ,
iov_iter_npages ( iter , BIO_MAX_VECS ) ,
REQ_OP_READ ,
GFP_KERNEL ,
& c - > bio_read ) ;
bio - > bi_end_io = bch2_direct_IO_read_split_endio ;
start :
bio - > bi_opf = REQ_OP_READ | REQ_SYNC ;
bio - > bi_iter . bi_sector = offset > > 9 ;
bio - > bi_private = dio ;
ret = bio_iov_iter_get_pages ( bio , iter ) ;
if ( ret < 0 ) {
/* XXX: fault inject this path */
bio - > bi_status = BLK_STS_RESOURCE ;
bio_endio ( bio ) ;
break ;
}
offset + = bio - > bi_iter . bi_size ;
2021-01-21 22:42:23 +03:00
if ( dio - > should_dirty )
bio_set_pages_dirty ( bio ) ;
2017-03-17 09:18:50 +03:00
if ( iter - > count )
closure_get ( & dio - > cl ) ;
2021-03-13 04:30:39 +03:00
bch2_read ( c , rbio_init ( bio , opts ) , inode_inum ( inode ) ) ;
2017-03-17 09:18:50 +03:00
}
iter - > count + = shorten ;
if ( sync ) {
closure_sync ( & dio - > cl ) ;
closure_debug_destroy ( & dio - > cl ) ;
ret = dio - > ret ;
2021-01-21 22:42:23 +03:00
bio_check_or_release ( & dio - > rbio . bio , dio - > should_dirty ) ;
2017-03-17 09:18:50 +03:00
return ret ;
} else {
return - EIOCBQUEUED ;
}
}
ssize_t bch2_read_iter ( struct kiocb * iocb , struct iov_iter * iter )
{
struct file * file = iocb - > ki_filp ;
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct address_space * mapping = file - > f_mapping ;
size_t count = iov_iter_count ( iter ) ;
ssize_t ret ;
if ( ! count )
return 0 ; /* skip atime */
if ( iocb - > ki_flags & IOCB_DIRECT ) {
struct blk_plug plug ;
2022-11-02 23:45:28 +03:00
if ( unlikely ( mapping - > nrpages ) ) {
ret = filemap_write_and_wait_range ( mapping ,
iocb - > ki_pos ,
iocb - > ki_pos + count - 1 ) ;
if ( ret < 0 )
return ret ;
}
2017-03-17 09:18:50 +03:00
file_accessed ( file ) ;
blk_start_plug ( & plug ) ;
ret = bch2_direct_IO_read ( iocb , iter ) ;
blk_finish_plug ( & plug ) ;
if ( ret > = 0 )
iocb - > ki_pos + = ret ;
} else {
bch2_pagecache_add_get ( & inode - > ei_pagecache_lock ) ;
ret = generic_file_read_iter ( iocb , iter ) ;
bch2_pagecache_add_put ( & inode - > ei_pagecache_lock ) ;
}
return ret ;
}
/* O_DIRECT writes */
2021-03-16 07:28:17 +03:00
static bool bch2_check_range_allocated ( struct bch_fs * c , subvol_inum inum ,
u64 offset , u64 size ,
unsigned nr_replicas , bool compressed )
{
struct btree_trans trans ;
struct btree_iter iter ;
struct bkey_s_c k ;
u64 end = offset + size ;
u32 snapshot ;
bool ret = true ;
int err ;
bch2_trans_init ( & trans , c , 0 , 0 ) ;
retry :
bch2_trans_begin ( & trans ) ;
err = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( err )
goto err ;
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
2021-03-16 07:28:17 +03:00
SPOS ( inum . inum , offset , snapshot ) ,
BTREE_ITER_SLOTS , k , err ) {
if ( bkey_cmp ( bkey_start_pos ( k . k ) , POS ( inum . inum , end ) ) > = 0 )
break ;
2021-03-13 04:30:39 +03:00
if ( k . k - > p . snapshot ! = snapshot | |
nr_replicas > bch2_bkey_replicas ( c , k ) | |
2021-03-16 07:28:17 +03:00
( ! compressed & & bch2_bkey_sectors_compressed ( k ) ) ) {
ret = false ;
break ;
}
}
offset = iter . pos . offset ;
bch2_trans_iter_exit ( & trans , & iter ) ;
err :
if ( err = = - EINTR )
goto retry ;
bch2_trans_exit ( & trans ) ;
return err ? false : ret ;
}
2017-03-17 09:18:50 +03:00
/*
* We ' re going to return - EIOCBQUEUED , but we haven ' t finished consuming the
* iov_iter yet , so we need to stash a copy of the iovec : it might be on the
* caller ' s stack , we ' re not guaranteed that it will live for the duration of
* the IO :
*/
static noinline int bch2_dio_write_copy_iov ( struct dio_write * dio )
{
struct iovec * iov = dio - > inline_vecs ;
/*
* iov_iter has a single embedded iovec - nothing to do :
*/
if ( iter_is_ubuf ( & dio - > iter ) )
return 0 ;
/*
* We don ' t currently handle non - iovec iov_iters here - return an error ,
* and we ' ll fall back to doing the IO synchronously :
*/
if ( ! iter_is_iovec ( & dio - > iter ) )
return - 1 ;
if ( dio - > iter . nr_segs > ARRAY_SIZE ( dio - > inline_vecs ) ) {
iov = kmalloc_array ( dio - > iter . nr_segs , sizeof ( * iov ) ,
GFP_KERNEL ) ;
if ( unlikely ( ! iov ) )
return - ENOMEM ;
dio - > free_iov = true ;
}
memcpy ( iov , dio - > iter . __iov , dio - > iter . nr_segs * sizeof ( * iov ) ) ;
dio - > iter . __iov = iov ;
return 0 ;
}
2020-06-30 01:22:06 +03:00
static void bch2_dio_write_loop_async ( struct bch_write_op * ) ;
2017-03-17 09:18:50 +03:00
static long bch2_dio_write_loop ( struct dio_write * dio )
{
2019-01-14 05:36:14 +03:00
bool kthread = ( current - > flags & PF_KTHREAD ) ! = 0 ;
2017-03-17 09:18:50 +03:00
struct kiocb * req = dio - > req ;
struct address_space * mapping = req - > ki_filp - > f_mapping ;
2019-10-09 19:50:39 +03:00
struct bch_inode_info * inode = file_bch_inode ( req - > ki_filp ) ;
2020-06-30 01:22:06 +03:00
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-10-09 19:50:39 +03:00
struct bio * bio = & dio - > op . wbio . bio ;
2020-11-11 20:33:12 +03:00
unsigned unaligned , iter_count ;
bool sync = dio - > sync , dropped_locks ;
2017-03-17 09:18:50 +03:00
long ret ;
if ( dio - > loop )
goto loop ;
while ( 1 ) {
2020-11-11 20:33:12 +03:00
iter_count = dio - > iter . count ;
2019-01-14 05:36:14 +03:00
if ( kthread )
kthread_use_mm ( dio - > mm ) ;
2017-03-17 09:18:50 +03:00
BUG_ON ( current - > faults_disabled_mapping ) ;
current - > faults_disabled_mapping = mapping ;
ret = bio_iov_iter_get_pages ( bio , & dio - > iter ) ;
2020-11-11 20:33:12 +03:00
dropped_locks = fdm_dropped_locks ( ) ;
2017-03-17 09:18:50 +03:00
current - > faults_disabled_mapping = NULL ;
2019-01-14 05:36:14 +03:00
if ( kthread )
kthread_unuse_mm ( dio - > mm ) ;
2017-03-17 09:18:50 +03:00
2020-11-11 20:33:12 +03:00
/*
* If the fault handler returned an error but also signalled
* that it dropped & retook ei_pagecache_lock , we just need to
* re - shoot down the page cache and retry :
*/
if ( dropped_locks & & ret )
ret = 0 ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret < 0 ) )
goto err ;
2020-11-11 20:33:12 +03:00
if ( unlikely ( dropped_locks ) ) {
ret = write_invalidate_inode_pages_range ( mapping ,
req - > ki_pos ,
req - > ki_pos + iter_count - 1 ) ;
if ( unlikely ( ret ) )
goto err ;
if ( ! bio - > bi_iter . bi_size )
continue ;
}
2019-09-22 22:02:05 +03:00
unaligned = bio - > bi_iter . bi_size & ( block_bytes ( c ) - 1 ) ;
bio - > bi_iter . bi_size - = unaligned ;
iov_iter_revert ( & dio - > iter , unaligned ) ;
if ( ! bio - > bi_iter . bi_size ) {
/*
* bio_iov_iter_get_pages was only able to get <
* blocksize worth of pages :
*/
ret = - EFAULT ;
goto err ;
}
2020-06-30 01:22:06 +03:00
bch2_write_op_init ( & dio - > op , c , io_opts ( c , & inode - > ei_inode ) ) ;
dio - > op . end_io = bch2_dio_write_loop_async ;
dio - > op . target = dio - > op . opts . foreground_target ;
dio - > op . write_point = writepoint_hashed ( ( unsigned long ) current ) ;
dio - > op . nr_replicas = dio - > op . opts . data_replicas ;
2021-03-13 04:30:39 +03:00
dio - > op . subvol = inode - > ei_subvol ;
2020-06-30 01:22:06 +03:00
dio - > op . pos = POS ( inode - > v . i_ino , ( u64 ) req - > ki_pos > > 9 ) ;
if ( ( req - > ki_flags & IOCB_DSYNC ) & &
! c - > opts . journal_flush_disabled )
dio - > op . flags | = BCH_WRITE_FLUSH ;
2021-05-20 22:49:23 +03:00
dio - > op . flags | = BCH_WRITE_CHECK_ENOSPC ;
2020-06-30 01:22:06 +03:00
ret = bch2_disk_reservation_get ( c , & dio - > op . res , bio_sectors ( bio ) ,
dio - > op . opts . data_replicas , 0 ) ;
if ( unlikely ( ret ) & &
2021-03-16 07:28:17 +03:00
! bch2_check_range_allocated ( c , inode_inum ( inode ) ,
dio - > op . pos . offset , bio_sectors ( bio ) ,
bcachefs: Change when we allow overwrites
Originally, we'd check for -ENOSPC when getting a disk reservation
whenever the new extent took up more space on disk than the old extent.
Erasure coding screwed this up, because with erasure coding writes are
initially replicated, and then in the background the extra replicas are
dropped when the stripe is created. This means that with erasure coding
enabled, writes will always take up more space on disk than the data
they're overwriting - but, according to posix, overwrites aren't
supposed to return ENOSPC.
So, in this patch we fudge things: if the new extent has more replicas
than the _effective_ replicas of the old extent, or if the old extent is
compressed and the new one isn't, we check for ENOSPC when getting the
disk reservation - otherwise, we don't.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
2020-12-15 05:59:33 +03:00
dio - > op . opts . data_replicas ,
dio - > op . opts . compression ! = 0 ) )
2020-06-30 01:22:06 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
task_io_account_write ( bio - > bi_iter . bi_size ) ;
if ( ! dio - > sync & & ! dio - > loop & & dio - > iter . count ) {
if ( bch2_dio_write_copy_iov ( dio ) ) {
2020-03-16 21:49:52 +03:00
dio - > sync = sync = true ;
2019-11-02 04:16:51 +03:00
goto do_io ;
2017-03-17 09:18:50 +03:00
}
}
2019-11-02 04:16:51 +03:00
do_io :
2017-03-17 09:18:50 +03:00
dio - > loop = true ;
2019-11-02 04:16:51 +03:00
closure_call ( & dio - > op . cl , bch2_write , NULL , NULL ) ;
2017-03-17 09:18:50 +03:00
2020-03-16 21:49:52 +03:00
if ( sync )
2019-11-02 04:16:51 +03:00
wait_for_completion ( & dio - > done ) ;
else
2017-03-17 09:18:50 +03:00
return - EIOCBQUEUED ;
loop :
2019-10-09 19:50:39 +03:00
i_sectors_acct ( c , inode , & dio - > quota_res ,
dio - > op . i_sectors_delta ) ;
2020-06-30 01:22:06 +03:00
req - > ki_pos + = ( u64 ) dio - > op . written < < 9 ;
dio - > written + = dio - > op . written ;
2019-10-09 19:50:39 +03:00
spin_lock ( & inode - > v . i_lock ) ;
2020-06-30 01:22:06 +03:00
if ( req - > ki_pos > inode - > v . i_size )
i_size_write ( & inode - > v , req - > ki_pos ) ;
2019-10-09 19:50:39 +03:00
spin_unlock ( & inode - > v . i_lock ) ;
2021-04-27 21:18:22 +03:00
bio_release_pages ( bio , false ) ;
2021-07-14 07:14:45 +03:00
bio - > bi_vcnt = 0 ;
2020-12-03 22:27:20 +03:00
if ( dio - > op . error ) {
set_bit ( EI_INODE_ERROR , & inode - > ei_flags ) ;
break ;
}
if ( ! dio - > iter . count )
2017-03-17 09:18:50 +03:00
break ;
2019-11-02 04:16:51 +03:00
2017-03-17 09:18:50 +03:00
bio_reset ( bio , NULL , REQ_OP_WRITE ) ;
2019-11-02 04:16:51 +03:00
reinit_completion ( & dio - > done ) ;
2017-03-17 09:18:50 +03:00
}
2020-06-30 01:22:06 +03:00
ret = dio - > op . error ? : ( ( long ) dio - > written < < 9 ) ;
2017-03-17 09:18:50 +03:00
err :
bch2_pagecache_block_put ( & inode - > ei_pagecache_lock ) ;
2019-09-22 22:02:05 +03:00
bch2_quota_reservation_put ( c , inode , & dio - > quota_res ) ;
2017-03-17 09:18:50 +03:00
if ( dio - > free_iov )
kfree ( dio - > iter . __iov ) ;
2021-07-14 07:14:45 +03:00
bio_release_pages ( bio , false ) ;
2017-03-17 09:18:50 +03:00
bio_put ( bio ) ;
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end ( & inode - > v ) ;
if ( ! sync ) {
req - > ki_complete ( req , ret ) ;
ret = - EIOCBQUEUED ;
}
return ret ;
}
2019-11-02 04:16:51 +03:00
static void bch2_dio_write_loop_async ( struct bch_write_op * op )
2017-03-17 09:18:50 +03:00
{
2019-11-02 04:16:51 +03:00
struct dio_write * dio = container_of ( op , struct dio_write , op ) ;
2017-03-17 09:18:50 +03:00
2019-11-02 04:16:51 +03:00
if ( dio - > sync )
complete ( & dio - > done ) ;
else
bch2_dio_write_loop ( dio ) ;
2017-03-17 09:18:50 +03:00
}
static noinline
ssize_t bch2_direct_write ( struct kiocb * req , struct iov_iter * iter )
{
struct file * file = req - > ki_filp ;
2019-11-04 22:11:53 +03:00
struct address_space * mapping = file - > f_mapping ;
2017-03-17 09:18:50 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct dio_write * dio ;
struct bio * bio ;
2019-11-02 04:35:25 +03:00
bool locked = true , extending ;
2017-03-17 09:18:50 +03:00
ssize_t ret ;
2019-11-02 04:35:25 +03:00
prefetch ( & c - > opts ) ;
prefetch ( ( void * ) & c - > opts + 64 ) ;
prefetch ( & inode - > ei_inode ) ;
prefetch ( ( void * ) & inode - > ei_inode + 64 ) ;
2017-03-17 09:18:50 +03:00
2019-11-02 04:35:25 +03:00
inode_lock ( & inode - > v ) ;
ret = generic_write_checks ( req , iter ) ;
if ( unlikely ( ret < = 0 ) )
goto err ;
ret = file_remove_privs ( file ) ;
if ( unlikely ( ret ) )
goto err ;
ret = file_update_time ( file ) ;
if ( unlikely ( ret ) )
goto err ;
2017-03-17 09:18:50 +03:00
2019-01-20 02:12:24 +03:00
if ( unlikely ( ( req - > ki_pos | iter - > count ) & ( block_bytes ( c ) - 1 ) ) )
2019-11-02 04:35:25 +03:00
goto err ;
inode_dio_begin ( & inode - > v ) ;
bch2_pagecache_block_get ( & inode - > ei_pagecache_lock ) ;
extending = req - > ki_pos + iter - > count > inode - > v . i_size ;
if ( ! extending ) {
inode_unlock ( & inode - > v ) ;
locked = false ;
}
2017-03-17 09:18:50 +03:00
bio = bio_alloc_bioset ( NULL ,
2021-06-14 21:47:26 +03:00
iov_iter_is_bvec ( iter )
? 0
: iov_iter_npages ( iter , BIO_MAX_VECS ) ,
2017-03-17 09:18:50 +03:00
REQ_OP_WRITE ,
GFP_KERNEL ,
& c - > dio_write_bioset ) ;
2019-10-09 19:50:39 +03:00
dio = container_of ( bio , struct dio_write , op . wbio . bio ) ;
2019-11-02 04:16:51 +03:00
init_completion ( & dio - > done ) ;
2017-03-17 09:18:50 +03:00
dio - > req = req ;
2019-01-14 05:36:14 +03:00
dio - > mm = current - > mm ;
2017-03-17 09:18:50 +03:00
dio - > loop = false ;
2019-11-02 04:35:25 +03:00
dio - > sync = is_sync_kiocb ( req ) | | extending ;
2017-03-17 09:18:50 +03:00
dio - > free_iov = false ;
dio - > quota_res . sectors = 0 ;
2020-06-30 01:22:06 +03:00
dio - > written = 0 ;
2017-03-17 09:18:50 +03:00
dio - > iter = * iter ;
2019-10-09 19:50:39 +03:00
2017-03-17 09:18:50 +03:00
ret = bch2_quota_reservation_add ( c , inode , & dio - > quota_res ,
iter - > count > > 9 , true ) ;
if ( unlikely ( ret ) )
2019-11-02 04:35:25 +03:00
goto err_put_bio ;
2017-03-17 09:18:50 +03:00
2022-11-02 23:45:28 +03:00
if ( unlikely ( mapping - > nrpages ) ) {
ret = write_invalidate_inode_pages_range ( mapping ,
req - > ki_pos ,
req - > ki_pos + iter - > count - 1 ) ;
if ( unlikely ( ret ) )
goto err_put_bio ;
}
2019-11-04 22:11:53 +03:00
2019-11-02 04:35:25 +03:00
ret = bch2_dio_write_loop ( dio ) ;
2017-03-17 09:18:50 +03:00
err :
2019-11-02 04:35:25 +03:00
if ( locked )
inode_unlock ( & inode - > v ) ;
return ret ;
err_put_bio :
bch2_pagecache_block_put ( & inode - > ei_pagecache_lock ) ;
2017-03-17 09:18:50 +03:00
bch2_quota_reservation_put ( c , inode , & dio - > quota_res ) ;
bio_put ( bio ) ;
2019-11-02 04:35:25 +03:00
inode_dio_end ( & inode - > v ) ;
goto err ;
2017-03-17 09:18:50 +03:00
}
2019-11-02 04:35:25 +03:00
ssize_t bch2_write_iter ( struct kiocb * iocb , struct iov_iter * from )
2017-03-17 09:18:50 +03:00
{
struct file * file = iocb - > ki_filp ;
2019-11-02 04:35:25 +03:00
struct bch_inode_info * inode = file_bch_inode ( file ) ;
2017-03-17 09:18:50 +03:00
ssize_t ret ;
if ( iocb - > ki_flags & IOCB_DIRECT )
return bch2_direct_write ( iocb , from ) ;
2019-11-02 04:35:25 +03:00
inode_lock ( & inode - > v ) ;
ret = generic_write_checks ( iocb , from ) ;
if ( ret < = 0 )
goto unlock ;
2017-03-17 09:18:50 +03:00
ret = file_remove_privs ( file ) ;
if ( ret )
2019-11-02 04:35:25 +03:00
goto unlock ;
2017-03-17 09:18:50 +03:00
ret = file_update_time ( file ) ;
if ( ret )
2019-11-02 04:35:25 +03:00
goto unlock ;
2017-03-17 09:18:50 +03:00
2019-11-02 04:35:25 +03:00
ret = bch2_buffered_write ( iocb , from ) ;
2017-03-17 09:18:50 +03:00
if ( likely ( ret > 0 ) )
iocb - > ki_pos + = ret ;
2019-11-02 04:35:25 +03:00
unlock :
2017-03-17 09:18:50 +03:00
inode_unlock ( & inode - > v ) ;
2019-11-02 04:35:25 +03:00
if ( ret > 0 )
2017-03-17 09:18:50 +03:00
ret = generic_write_sync ( iocb , ret ) ;
return ret ;
}
/* fsync: */
2021-11-05 22:17:13 +03:00
/*
* inode - > ei_inode . bi_journal_seq won ' t be up to date since it ' s set in an
* insert trigger : look up the btree inode instead
*/
static int bch2_flush_inode ( struct bch_fs * c , subvol_inum inum )
2017-03-17 09:18:50 +03:00
{
2021-11-05 22:17:13 +03:00
struct bch_inode_unpacked inode ;
int ret ;
2017-03-17 09:18:50 +03:00
2021-11-05 22:17:13 +03:00
if ( c - > opts . journal_flush_disabled )
return 0 ;
ret = bch2_inode_find_by_inum ( c , inum , & inode ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
2021-11-05 22:17:13 +03:00
return bch2_journal_flush_seq ( & c - > journal , inode . bi_journal_seq ) ;
}
2017-03-17 09:18:50 +03:00
2021-11-05 22:17:13 +03:00
int bch2_fsync ( struct file * file , loff_t start , loff_t end , int datasync )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
int ret , ret2 , ret3 ;
ret = file_write_and_wait_range ( file , start , end ) ;
ret2 = sync_inode_metadata ( & inode - > v , 1 ) ;
ret3 = bch2_flush_inode ( c , inode_inum ( inode ) ) ;
2018-07-23 14:53:29 +03:00
2021-11-05 22:17:13 +03:00
return ret ? : ret2 ? : ret3 ;
2017-03-17 09:18:50 +03:00
}
/* truncate: */
2021-03-16 07:28:17 +03:00
static inline int range_has_data ( struct bch_fs * c , u32 subvol ,
struct bpos start ,
struct bpos end )
2017-03-17 09:18:50 +03:00
{
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
int ret = 0 ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , subvol , & start . snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents , start , 0 , k , ret ) {
2017-03-17 09:18:50 +03:00
if ( bkey_cmp ( bkey_start_pos ( k . k ) , end ) > = 0 )
break ;
if ( bkey_extent_is_data ( k . k ) ) {
ret = 1 ;
break ;
}
}
2021-03-16 07:28:17 +03:00
start = iter . pos ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
if ( ret = = - EINTR )
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
return ret ;
2017-03-17 09:18:50 +03:00
}
static int __bch2_truncate_page ( struct bch_inode_info * inode ,
pgoff_t index , loff_t start , loff_t end )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2019-08-06 18:19:58 +03:00
struct bch_page_state * s ;
2017-03-17 09:18:50 +03:00
unsigned start_offset = start & ( PAGE_SIZE - 1 ) ;
unsigned end_offset = ( ( end - 1 ) & ( PAGE_SIZE - 1 ) ) + 1 ;
2019-08-06 18:19:58 +03:00
unsigned i ;
2017-03-17 09:18:50 +03:00
struct page * page ;
2021-11-22 20:47:20 +03:00
s64 i_sectors_delta = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
/* Page boundary? Nothing to do */
if ( ! ( ( index = = start > > PAGE_SHIFT & & start_offset ) | |
( index = = end > > PAGE_SHIFT & & end_offset ! = PAGE_SIZE ) ) )
return 0 ;
/* Above i_size? */
if ( index < < PAGE_SHIFT > = inode - > v . i_size )
return 0 ;
page = find_lock_page ( mapping , index ) ;
if ( ! page ) {
/*
* XXX : we ' re doing two index lookups when we end up reading the
* page
*/
2021-03-16 07:28:17 +03:00
ret = range_has_data ( c , inode - > ei_subvol ,
2021-11-24 04:00:34 +03:00
POS ( inode - > v . i_ino , index < < PAGE_SECTORS_SHIFT ) ,
POS ( inode - > v . i_ino , ( index + 1 ) < < PAGE_SECTORS_SHIFT ) ) ;
2017-03-17 09:18:50 +03:00
if ( ret < = 0 )
return ret ;
page = find_or_create_page ( mapping , index , GFP_KERNEL ) ;
if ( unlikely ( ! page ) ) {
ret = - ENOMEM ;
goto out ;
}
}
2019-08-06 18:19:58 +03:00
s = bch2_page_state_create ( page , 0 ) ;
if ( ! s ) {
ret = - ENOMEM ;
goto unlock ;
}
2017-03-17 09:18:50 +03:00
if ( ! PageUptodate ( page ) ) {
ret = bch2_read_single_page ( page , mapping ) ;
if ( ret )
goto unlock ;
}
2019-08-06 18:19:58 +03:00
if ( index ! = start > > PAGE_SHIFT )
start_offset = 0 ;
if ( index ! = end > > PAGE_SHIFT )
end_offset = PAGE_SIZE ;
for ( i = round_up ( start_offset , block_bytes ( c ) ) > > 9 ;
i < round_down ( end_offset , block_bytes ( c ) ) > > 9 ;
i + + ) {
s - > s [ i ] . nr_replicas = 0 ;
2021-11-22 20:47:20 +03:00
if ( s - > s [ i ] . state = = SECTOR_DIRTY )
i_sectors_delta - - ;
2019-08-06 18:19:58 +03:00
s - > s [ i ] . state = SECTOR_UNALLOCATED ;
}
2021-11-22 20:47:20 +03:00
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2021-11-06 20:39:42 +03:00
/*
* Caller needs to know whether this page will be written out by
* writeback - doing an i_size update if necessary - or whether it will
* be responsible for the i_size update :
*/
ret = s - > s [ ( min_t ( u64 , inode - > v . i_size - ( index < < PAGE_SHIFT ) ,
PAGE_SIZE ) - 1 ) > > 9 ] . state > = SECTOR_DIRTY ;
2019-08-06 18:19:58 +03:00
zero_user_segment ( page , start_offset , end_offset ) ;
2017-03-17 09:18:50 +03:00
/*
* Bit of a hack - we don ' t want truncate to fail due to - ENOSPC .
*
* XXX : because we aren ' t currently tracking whether the page has actual
* data in it ( vs . just 0 s , or only partially written ) this wrong . ick .
*/
2021-11-06 20:39:42 +03:00
BUG_ON ( bch2_get_page_disk_reservation ( c , inode , page , false ) ) ;
2017-03-17 09:18:50 +03:00
2020-10-09 07:09:20 +03:00
/*
* This removes any writeable userspace mappings ; we need to force
* . page_mkwrite to be called again before any mmapped writes , to
* redirty the full page :
*/
page_mkclean ( page ) ;
2019-07-29 20:38:38 +03:00
filemap_dirty_folio ( mapping , page_folio ( page ) ) ;
2017-03-17 09:18:50 +03:00
unlock :
unlock_page ( page ) ;
put_page ( page ) ;
out :
return ret ;
}
static int bch2_truncate_page ( struct bch_inode_info * inode , loff_t from )
{
return __bch2_truncate_page ( inode , from > > PAGE_SHIFT ,
2019-08-06 18:19:58 +03:00
from , round_up ( from , PAGE_SIZE ) ) ;
2017-03-17 09:18:50 +03:00
}
2021-11-06 20:39:42 +03:00
static int bch2_truncate_pages ( struct bch_inode_info * inode ,
loff_t start , loff_t end )
{
int ret = __bch2_truncate_page ( inode , start > > PAGE_SHIFT ,
start , end ) ;
if ( ret > = 0 & &
start > > PAGE_SHIFT ! = end > > PAGE_SHIFT )
ret = __bch2_truncate_page ( inode ,
end > > PAGE_SHIFT ,
start , end ) ;
return ret ;
}
2021-06-15 05:29:54 +03:00
static int bch2_extend ( struct mnt_idmap * idmap ,
struct bch_inode_info * inode ,
2019-10-09 18:12:48 +03:00
struct bch_inode_unpacked * inode_u ,
struct iattr * iattr )
2017-03-17 09:18:50 +03:00
{
struct address_space * mapping = inode - > v . i_mapping ;
int ret ;
2019-10-09 18:12:48 +03:00
/*
* sync appends :
2019-10-09 19:11:00 +03:00
*
* this has to be done _before_ extending i_size :
2019-10-09 18:12:48 +03:00
*/
ret = filemap_write_and_wait_range ( mapping , inode_u - > bi_size , S64_MAX ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
truncate_setsize ( & inode - > v , iattr - > ia_size ) ;
2021-06-15 05:29:54 +03:00
return bch2_setattr_nonsize ( idmap , inode , iattr ) ;
2017-03-17 09:18:50 +03:00
}
2018-08-09 04:09:31 +03:00
static int bch2_truncate_finish_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi ,
void * p )
{
bi - > bi_flags & = ~ BCH_INODE_I_SIZE_DIRTY ;
return 0 ;
}
static int bch2_truncate_start_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi , void * p )
{
u64 * new_i_size = p ;
bi - > bi_flags | = BCH_INODE_I_SIZE_DIRTY ;
bi - > bi_size = * new_i_size ;
return 0 ;
}
2021-06-15 05:29:54 +03:00
int bch2_truncate ( struct mnt_idmap * idmap ,
struct bch_inode_info * inode , struct iattr * iattr )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2019-10-09 18:12:48 +03:00
struct bch_inode_unpacked inode_u ;
2018-08-09 04:09:31 +03:00
u64 new_i_size = iattr - > ia_size ;
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2021-06-15 05:29:54 +03:00
/*
2021-06-28 03:54:34 +03:00
* If the truncate call with change the size of the file , the
* cmtimes should be updated . If the size will not change , we
* do not need to update the cmtimes .
2021-06-15 05:29:54 +03:00
*/
2021-06-28 03:54:34 +03:00
if ( iattr - > ia_size ! = inode - > v . i_size ) {
if ( ! ( iattr - > ia_valid & ATTR_MTIME ) )
ktime_get_coarse_real_ts64 ( & iattr - > ia_mtime ) ;
if ( ! ( iattr - > ia_valid & ATTR_CTIME ) )
ktime_get_coarse_real_ts64 ( & iattr - > ia_ctime ) ;
iattr - > ia_valid | = ATTR_MTIME | ATTR_CTIME ;
}
2021-06-15 05:29:54 +03:00
2017-03-17 09:18:50 +03:00
inode_dio_wait ( & inode - > v ) ;
bch2_pagecache_block_get ( & inode - > ei_pagecache_lock ) ;
2021-03-16 07:28:17 +03:00
ret = bch2_inode_find_by_inum ( c , inode_inum ( inode ) , & inode_u ) ;
2019-12-18 21:18:33 +03:00
if ( ret )
goto err ;
/*
* check this before next assertion ; on filesystem error our normal
* invariants are a bit broken ( truncate has to truncate the page cache
* before the inode ) .
*/
ret = bch2_journal_error ( & c - > journal ) ;
2019-10-09 18:12:48 +03:00
if ( ret )
goto err ;
2017-03-17 09:18:50 +03:00
2020-12-03 22:27:20 +03:00
WARN_ON ( ! test_bit ( EI_INODE_ERROR , & inode - > ei_flags ) & &
inode - > v . i_size < inode_u . bi_size ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 18:12:48 +03:00
if ( iattr - > ia_size > inode - > v . i_size ) {
2021-06-15 05:29:54 +03:00
ret = bch2_extend ( idmap , inode , & inode_u , iattr ) ;
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
}
2021-06-15 05:29:54 +03:00
iattr - > ia_valid & = ~ ATTR_SIZE ;
2017-03-17 09:18:50 +03:00
ret = bch2_truncate_page ( inode , iattr - > ia_size ) ;
2021-11-06 20:39:42 +03:00
if ( unlikely ( ret < 0 ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2019-09-20 01:05:04 +03:00
/*
* When extending , we ' re going to write the new i_size to disk
* immediately so we need to flush anything above the current on disk
* i_size first :
*
* Also , when extending we need to flush the page that i_size currently
* straddles - if it ' s mapped to userspace , we need to ensure that
* userspace has to redirty it and call . mkwrite - > set_page_dirty
* again to allocate the part of the page that was extended .
*/
2019-10-09 18:12:48 +03:00
if ( iattr - > ia_size > inode_u . bi_size )
2017-03-17 09:18:50 +03:00
ret = filemap_write_and_wait_range ( mapping ,
2019-10-09 18:12:48 +03:00
inode_u . bi_size ,
2017-03-17 09:18:50 +03:00
iattr - > ia_size - 1 ) ;
else if ( iattr - > ia_size & ( PAGE_SIZE - 1 ) )
ret = filemap_write_and_wait_range ( mapping ,
round_down ( iattr - > ia_size , PAGE_SIZE ) ,
iattr - > ia_size - 1 ) ;
if ( ret )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2018-08-09 04:09:31 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
ret = bch2_write_inode ( c , inode , bch2_truncate_start_fn ,
& new_i_size , 0 ) ;
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
truncate_setsize ( & inode - > v , iattr - > ia_size ) ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2019-08-06 18:19:58 +03:00
round_up ( iattr - > ia_size , block_bytes ( c ) ) > > 9 ,
2021-11-05 22:17:13 +03:00
U64_MAX , & i_sectors_delta ) ;
2019-10-10 19:47:22 +03:00
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2021-11-24 02:21:09 +03:00
BUG_ON ( ! inode - > v . i_size & & inode - > v . i_blocks ) ;
2017-03-17 09:18:50 +03:00
if ( unlikely ( ret ) )
2018-08-09 04:09:31 +03:00
goto err ;
2017-03-17 09:18:50 +03:00
2018-08-09 04:09:31 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-06-15 05:29:54 +03:00
ret = bch2_write_inode ( c , inode , bch2_truncate_finish_fn , NULL , 0 ) ;
2018-08-09 04:09:31 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2021-06-15 05:29:54 +03:00
ret = bch2_setattr_nonsize ( idmap , inode , iattr ) ;
2018-08-09 04:09:31 +03:00
err :
2017-03-17 09:18:50 +03:00
bch2_pagecache_block_put ( & inode - > ei_pagecache_lock ) ;
return ret ;
}
/* fallocate: */
2021-04-29 02:36:12 +03:00
static int inode_update_times_fn ( struct bch_inode_info * inode ,
struct bch_inode_unpacked * bi , void * p )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
bi - > bi_mtime = bi - > bi_ctime = bch2_current_time ( c ) ;
return 0 ;
}
2019-10-10 19:47:22 +03:00
static long bchfs_fpunch ( struct bch_inode_info * inode , loff_t offset , loff_t len )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2021-11-06 20:39:42 +03:00
u64 end = offset + len ;
u64 block_start = round_up ( offset , block_bytes ( c ) ) ;
u64 block_end = round_down ( end , block_bytes ( c ) ) ;
bool truncated_last_page ;
2017-03-17 09:18:50 +03:00
int ret = 0 ;
2021-11-06 20:39:42 +03:00
ret = bch2_truncate_pages ( inode , offset , end ) ;
if ( unlikely ( ret < 0 ) )
2017-03-17 09:18:50 +03:00
goto err ;
2021-11-06 20:39:42 +03:00
truncated_last_page = ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
truncate_pagecache_range ( & inode - > v , offset , end - 1 ) ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
if ( block_start < block_end ) {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2021-11-06 20:39:42 +03:00
block_start > > 9 , block_end > > 9 ,
2019-10-10 19:47:22 +03:00
& i_sectors_delta ) ;
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
}
2021-04-29 02:36:12 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-11-06 20:39:42 +03:00
if ( end > = inode - > v . i_size & & ! truncated_last_page ) {
ret = bch2_write_inode_size ( c , inode , inode - > v . i_size ,
ATTR_MTIME | ATTR_CTIME ) ;
} else {
ret = bch2_write_inode ( c , inode , inode_update_times_fn , NULL ,
ATTR_MTIME | ATTR_CTIME ) ;
}
2021-04-29 02:36:12 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
err :
return ret ;
}
2019-10-10 19:47:22 +03:00
static long bchfs_fcollapse_finsert ( struct bch_inode_info * inode ,
2019-09-08 01:04:23 +03:00
loff_t offset , loff_t len ,
bool insert )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
struct address_space * mapping = inode - > v . i_mapping ;
2020-12-17 23:08:58 +03:00
struct bkey_buf copy ;
2018-07-13 02:19:41 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter src , dst , del ;
2019-09-08 01:04:23 +03:00
loff_t shift , new_size ;
u64 src_start ;
2021-03-20 03:29:11 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
if ( ( offset | len ) & ( block_bytes ( c ) - 1 ) )
return - EINVAL ;
2019-09-08 01:04:23 +03:00
if ( insert ) {
if ( inode - > v . i_sb - > s_maxbytes - inode - > v . i_size < len )
2021-11-06 20:39:42 +03:00
return - EFBIG ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
if ( offset > = inode - > v . i_size )
2021-11-06 20:39:42 +03:00
return - EINVAL ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
src_start = U64_MAX ;
shift = len ;
} else {
if ( offset + len > = inode - > v . i_size )
2021-11-06 20:39:42 +03:00
return - EINVAL ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
src_start = offset + len ;
shift = - len ;
}
new_size = inode - > v . i_size + shift ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
ret = write_invalidate_inode_pages_range ( mapping , offset , LLONG_MAX ) ;
2019-07-22 20:37:02 +03:00
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2019-07-22 20:37:02 +03:00
2019-09-08 01:04:23 +03:00
if ( insert ) {
i_size_write ( & inode - > v , new_size ) ;
mutex_lock ( & inode - > ei_update_lock ) ;
ret = bch2_write_inode_size ( c , inode , new_size ,
ATTR_MTIME | ATTR_CTIME ) ;
mutex_unlock ( & inode - > ei_update_lock ) ;
} else {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2021-03-13 04:30:39 +03:00
ret = bch2_fpunch ( c , inode_inum ( inode ) ,
2019-10-10 19:47:22 +03:00
offset > > 9 , ( offset + len ) > > 9 ,
& i_sectors_delta ) ;
i_sectors_acct ( c , inode , NULL , i_sectors_delta ) ;
2019-09-08 01:04:23 +03:00
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2019-09-08 01:04:23 +03:00
}
2018-08-12 00:26:11 +03:00
2021-03-20 03:29:11 +03:00
bch2_bkey_buf_init ( & copy ) ;
2021-06-03 06:31:42 +03:00
bch2_trans_init ( & trans , c , BTREE_ITER_MAX , 1024 ) ;
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( & trans , & src , BTREE_ID_extents ,
2019-09-08 01:04:23 +03:00
POS ( inode - > v . i_ino , src_start > > 9 ) ,
2019-07-22 20:37:02 +03:00
BTREE_ITER_INTENT ) ;
2021-08-30 22:18:31 +03:00
bch2_trans_copy_iter ( & dst , & src ) ;
bch2_trans_copy_iter ( & del , & src ) ;
2019-09-08 01:04:23 +03:00
2021-03-20 03:29:11 +03:00
while ( ret = = 0 | | ret = = - EINTR ) {
2019-07-22 20:37:02 +03:00
struct disk_reservation disk_res =
bch2_disk_reservation_init ( c , 0 ) ;
struct bkey_i delete ;
struct bkey_s_c k ;
struct bpos next_pos ;
2019-09-08 01:04:23 +03:00
struct bpos move_pos = POS ( inode - > v . i_ino , offset > > 9 ) ;
struct bpos atomic_end ;
2020-01-01 00:17:42 +03:00
unsigned trigger_flags = 0 ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans ,
inode - > ei_subvol , & snapshot ) ;
if ( ret )
continue ;
bch2_btree_iter_set_snapshot ( & src , snapshot ) ;
bch2_btree_iter_set_snapshot ( & dst , snapshot ) ;
bch2_btree_iter_set_snapshot ( & del , snapshot ) ;
2017-03-17 09:18:50 +03:00
2021-07-25 03:24:10 +03:00
bch2_trans_begin ( & trans ) ;
2019-09-08 01:04:23 +03:00
k = insert
2021-08-30 22:18:31 +03:00
? bch2_btree_iter_peek_prev ( & src )
: bch2_btree_iter_peek ( & src ) ;
2019-07-22 20:37:02 +03:00
if ( ( ret = bkey_err ( k ) ) )
2021-03-20 03:29:11 +03:00
continue ;
2018-08-12 00:26:11 +03:00
2019-07-22 20:37:02 +03:00
if ( ! k . k | | k . k - > p . inode ! = inode - > v . i_ino )
break ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
if ( insert & &
bkey_cmp ( k . k - > p , POS ( inode - > v . i_ino , offset > > 9 ) ) < = 0 )
break ;
reassemble :
2020-12-17 23:08:58 +03:00
bch2_bkey_buf_reassemble ( & copy , c , k ) ;
2019-09-08 01:04:23 +03:00
if ( insert & &
2020-04-01 23:07:57 +03:00
bkey_cmp ( bkey_start_pos ( k . k ) , move_pos ) < 0 )
2019-11-10 00:01:15 +03:00
bch2_cut_front ( move_pos , copy . k ) ;
2017-03-17 09:18:50 +03:00
2019-11-10 00:01:15 +03:00
copy . k - > k . p . offset + = shift > > 9 ;
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & dst , bkey_start_pos ( & copy . k - > k ) ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
ret = bch2_extent_atomic_end ( & trans , & dst , copy . k , & atomic_end ) ;
2019-08-16 16:58:07 +03:00
if ( ret )
2021-03-20 03:29:11 +03:00
continue ;
2018-08-06 00:46:41 +03:00
2019-11-10 00:01:15 +03:00
if ( bkey_cmp ( atomic_end , copy . k - > k . p ) ) {
2019-09-08 01:04:23 +03:00
if ( insert ) {
move_pos = atomic_end ;
move_pos . offset - = shift > > 9 ;
goto reassemble ;
} else {
2019-11-10 03:02:48 +03:00
bch2_cut_back ( atomic_end , copy . k ) ;
2019-09-08 01:04:23 +03:00
}
}
2019-07-22 20:37:02 +03:00
bkey_init ( & delete . k ) ;
2020-04-01 23:07:57 +03:00
delete . k . p = copy . k - > k . p ;
delete . k . size = copy . k - > k . size ;
delete . k . p . offset - = shift > > 9 ;
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & del , bkey_start_pos ( & delete . k ) ) ;
2017-03-17 09:18:50 +03:00
2019-09-08 01:04:23 +03:00
next_pos = insert ? bkey_start_pos ( & delete . k ) : delete . k . p ;
2017-03-17 09:18:50 +03:00
2019-11-10 00:01:15 +03:00
if ( copy . k - > k . size = = k . k - > size ) {
2019-07-22 20:37:02 +03:00
/*
* If we ' re moving the entire extent , we can skip
* running triggers :
*/
2020-01-01 00:17:42 +03:00
trigger_flags | = BTREE_TRIGGER_NORUN ;
2019-07-22 20:37:02 +03:00
} else {
/* We might end up splitting compressed extents: */
unsigned nr_ptrs =
2019-11-17 00:25:58 +03:00
bch2_bkey_nr_ptrs_allocated ( bkey_i_to_s_c ( copy . k ) ) ;
2019-07-22 20:37:02 +03:00
ret = bch2_disk_reservation_get ( c , & disk_res ,
2019-11-10 00:01:15 +03:00
copy . k - > k . size , nr_ptrs ,
2019-07-22 20:37:02 +03:00
BCH_DISK_RESERVATION_NOFAIL ) ;
BUG_ON ( ret ) ;
}
2021-08-30 22:18:31 +03:00
ret = bch2_btree_iter_traverse ( & del ) ? :
bch2_trans_update ( & trans , & del , & delete , trigger_flags ) ? :
bch2_trans_update ( & trans , & dst , copy . k , trigger_flags ) ? :
2021-11-05 22:17:13 +03:00
bch2_trans_commit ( & trans , & disk_res , NULL ,
2020-01-01 03:37:10 +03:00
BTREE_INSERT_NOFAIL ) ;
2017-03-17 09:18:50 +03:00
bch2_disk_reservation_put ( c , & disk_res ) ;
2021-03-20 03:29:11 +03:00
2019-07-22 20:37:02 +03:00
if ( ! ret )
2021-08-30 22:18:31 +03:00
bch2_btree_iter_set_pos ( & src , next_pos ) ;
2017-03-17 09:18:50 +03:00
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & del ) ;
bch2_trans_iter_exit ( & trans , & dst ) ;
bch2_trans_iter_exit ( & trans , & src ) ;
2021-03-20 03:29:11 +03:00
bch2_trans_exit ( & trans ) ;
bch2_bkey_buf_exit ( & copy , c ) ;
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2019-09-08 01:04:23 +03:00
if ( ! insert ) {
i_size_write ( & inode - > v , new_size ) ;
ret = bch2_write_inode_size ( c , inode , new_size ,
ATTR_MTIME | ATTR_CTIME ) ;
2021-11-06 20:39:42 +03:00
} else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = bch2_write_inode ( c , inode , inode_update_times_fn , NULL ,
ATTR_MTIME | ATTR_CTIME ) ;
2019-09-08 01:04:23 +03:00
}
2021-11-06 20:39:42 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
return ret ;
}
2021-04-17 03:35:20 +03:00
static int __bchfs_fallocate ( struct bch_inode_info * inode , int mode ,
u64 start_sector , u64 end_sector )
2017-03-17 09:18:50 +03:00
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2018-08-06 00:48:00 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2021-04-17 03:35:20 +03:00
struct bpos end_pos = POS ( inode - > v . i_ino , end_sector ) ;
2019-10-09 19:50:39 +03:00
unsigned replicas = io_opts ( c , & inode - > ei_inode ) . data_replicas ;
2021-04-17 03:35:20 +03:00
int ret = 0 ;
2017-03-17 09:18:50 +03:00
2021-06-03 06:31:42 +03:00
bch2_trans_init ( & trans , c , BTREE_ITER_MAX , 512 ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
bch2_trans_iter_init ( & trans , & iter , BTREE_ID_extents ,
2021-04-17 03:35:20 +03:00
POS ( inode - > v . i_ino , start_sector ) ,
2018-08-06 00:48:00 +03:00
BTREE_ITER_SLOTS | BTREE_ITER_INTENT ) ;
2017-03-17 09:18:50 +03:00
2021-08-30 22:18:31 +03:00
while ( ! ret & & bkey_cmp ( iter . pos , end_pos ) < 0 ) {
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2017-03-17 09:18:50 +03:00
struct disk_reservation disk_res = { 0 } ;
2018-08-06 00:48:00 +03:00
struct quota_res quota_res = { 0 } ;
2017-03-17 09:18:50 +03:00
struct bkey_i_reservation reservation ;
struct bkey_s_c k ;
2021-04-17 03:35:20 +03:00
unsigned sectors ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
2020-02-26 23:39:46 +03:00
bch2_trans_begin ( & trans ) ;
2019-12-21 00:35:24 +03:00
2021-03-16 07:28:17 +03:00
ret = bch2_subvolume_get_snapshot ( & trans ,
inode - > ei_subvol , & snapshot ) ;
if ( ret )
goto bkey_err ;
bch2_btree_iter_set_snapshot ( & iter , snapshot ) ;
2021-08-30 22:18:31 +03:00
k = bch2_btree_iter_peek_slot ( & iter ) ;
2019-03-28 05:03:30 +03:00
if ( ( ret = bkey_err ( k ) ) )
goto bkey_err ;
2017-03-17 09:18:50 +03:00
/* already reserved */
2018-11-01 22:10:01 +03:00
if ( k . k - > type = = KEY_TYPE_reservation & &
2017-03-17 09:18:50 +03:00
bkey_s_c_to_reservation ( k ) . v - > nr_replicas > = replicas ) {
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2017-03-17 09:18:50 +03:00
continue ;
}
2018-08-06 00:48:00 +03:00
if ( bkey_extent_is_data ( k . k ) & &
! ( mode & FALLOC_FL_ZERO_RANGE ) ) {
2021-08-30 22:18:31 +03:00
bch2_btree_iter_advance ( & iter ) ;
2018-08-06 00:48:00 +03:00
continue ;
2017-03-17 09:18:50 +03:00
}
bkey_reservation_init ( & reservation . k_i ) ;
2018-11-01 22:10:01 +03:00
reservation . k . type = KEY_TYPE_reservation ;
2017-03-17 09:18:50 +03:00
reservation . k . p = k . k - > p ;
reservation . k . size = k . k - > size ;
2021-08-30 22:18:31 +03:00
bch2_cut_front ( iter . pos , & reservation . k_i ) ;
2019-11-10 03:02:48 +03:00
bch2_cut_back ( end_pos , & reservation . k_i ) ;
2017-03-17 09:18:50 +03:00
sectors = reservation . k . size ;
2019-11-17 00:25:58 +03:00
reservation . v . nr_replicas = bch2_bkey_nr_ptrs_allocated ( k ) ;
2017-03-17 09:18:50 +03:00
if ( ! bkey_extent_is_allocation ( k . k ) ) {
ret = bch2_quota_reservation_add ( c , inode ,
2018-08-06 00:48:00 +03:00
& quota_res ,
2017-03-17 09:18:50 +03:00
sectors , true ) ;
if ( unlikely ( ret ) )
2019-03-28 05:03:30 +03:00
goto bkey_err ;
2017-03-17 09:18:50 +03:00
}
if ( reservation . v . nr_replicas < replicas | |
2019-11-17 00:25:58 +03:00
bch2_bkey_sectors_compressed ( k ) ) {
2017-03-17 09:18:50 +03:00
ret = bch2_disk_reservation_get ( c , & disk_res , sectors ,
replicas , 0 ) ;
if ( unlikely ( ret ) )
2019-03-28 05:03:30 +03:00
goto bkey_err ;
2017-03-17 09:18:50 +03:00
reservation . v . nr_replicas = disk_res . nr_replicas ;
}
2021-03-13 04:30:39 +03:00
ret = bch2_extent_update ( & trans , inode_inum ( inode ) , & iter ,
& reservation . k_i ,
2021-11-05 22:17:13 +03:00
& disk_res , NULL ,
2021-05-20 22:49:23 +03:00
0 , & i_sectors_delta , true ) ;
2021-11-22 06:34:26 +03:00
if ( ret )
goto bkey_err ;
2019-10-10 19:47:22 +03:00
i_sectors_acct ( c , inode , & quota_res , i_sectors_delta ) ;
2019-03-28 05:03:30 +03:00
bkey_err :
2018-08-06 00:48:00 +03:00
bch2_quota_reservation_put ( c , inode , & quota_res ) ;
2017-03-17 09:18:50 +03:00
bch2_disk_reservation_put ( c , & disk_res ) ;
if ( ret = = - EINTR )
ret = 0 ;
}
2021-11-06 20:39:42 +03:00
2021-11-24 02:21:09 +03:00
bch2_trans_unlock ( & trans ) ; /* lock ordering, before taking pagecache locks: */
mark_pagecache_reserved ( inode , start_sector , iter . pos . offset ) ;
2021-11-06 20:39:42 +03:00
if ( ret = = - ENOSPC & & ( mode & FALLOC_FL_ZERO_RANGE ) ) {
struct quota_res quota_res = { 0 } ;
s64 i_sectors_delta = 0 ;
bch2_fpunch_at ( & trans , & iter , inode_inum ( inode ) ,
end_sector , & i_sectors_delta ) ;
i_sectors_acct ( c , inode , & quota_res , i_sectors_delta ) ;
bch2_quota_reservation_put ( c , inode , & quota_res ) ;
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-04-17 03:35:20 +03:00
bch2_trans_exit ( & trans ) ;
return ret ;
}
2021-03-20 03:29:11 +03:00
2021-04-17 03:35:20 +03:00
static long bchfs_fallocate ( struct bch_inode_info * inode , int mode ,
loff_t offset , loff_t len )
{
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2021-11-06 20:39:42 +03:00
u64 end = offset + len ;
u64 block_start = round_down ( offset , block_bytes ( c ) ) ;
u64 block_end = round_up ( end , block_bytes ( c ) ) ;
bool truncated_last_page = false ;
int ret , ret2 = 0 ;
2021-04-17 03:35:20 +03:00
if ( ! ( mode & FALLOC_FL_KEEP_SIZE ) & & end > inode - > v . i_size ) {
ret = inode_newsize_ok ( & inode - > v , end ) ;
if ( ret )
2021-11-06 20:39:42 +03:00
return ret ;
2021-04-17 03:35:20 +03:00
}
if ( mode & FALLOC_FL_ZERO_RANGE ) {
2021-11-06 20:39:42 +03:00
ret = bch2_truncate_pages ( inode , offset , end ) ;
if ( unlikely ( ret < 0 ) )
return ret ;
2021-04-17 03:35:20 +03:00
2021-11-06 20:39:42 +03:00
truncated_last_page = ret ;
2021-04-17 03:35:20 +03:00
truncate_pagecache_range ( & inode - > v , offset , end - 1 ) ;
2021-11-06 20:39:42 +03:00
block_start = round_up ( offset , block_bytes ( c ) ) ;
block_end = round_down ( end , block_bytes ( c ) ) ;
2021-04-17 03:35:20 +03:00
}
ret = __bchfs_fallocate ( inode , mode , block_start > > 9 , block_end > > 9 ) ;
2017-03-17 09:18:50 +03:00
2019-10-09 18:12:48 +03:00
/*
2021-11-06 20:39:42 +03:00
* On - ENOSPC in ZERO_RANGE mode , we still want to do the inode update ,
* so that the VFS cache i_size is consistent with the btree i_size :
2019-10-09 18:12:48 +03:00
*/
2021-11-06 20:39:42 +03:00
if ( ret & &
! ( ret = = - ENOSPC & & ( mode & FALLOC_FL_ZERO_RANGE ) ) )
return ret ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
if ( mode & FALLOC_FL_KEEP_SIZE & & end > inode - > v . i_size )
end = inode - > v . i_size ;
2017-03-17 09:18:50 +03:00
2021-11-06 20:39:42 +03:00
if ( end > = inode - > v . i_size & &
( ( ( mode & FALLOC_FL_ZERO_RANGE ) & & ! truncated_last_page ) | |
! ( mode & FALLOC_FL_KEEP_SIZE ) ) ) {
spin_lock ( & inode - > v . i_lock ) ;
i_size_write ( & inode - > v , end ) ;
spin_unlock ( & inode - > v . i_lock ) ;
2019-10-09 18:12:48 +03:00
mutex_lock ( & inode - > ei_update_lock ) ;
2021-11-06 20:39:42 +03:00
ret2 = bch2_write_inode_size ( c , inode , end , 0 ) ;
2019-10-09 18:12:48 +03:00
mutex_unlock ( & inode - > ei_update_lock ) ;
2017-03-17 09:18:50 +03:00
}
2021-11-06 20:39:42 +03:00
return ret ? : ret2 ;
2017-03-17 09:18:50 +03:00
}
long bch2_fallocate_dispatch ( struct file * file , int mode ,
loff_t offset , loff_t len )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
2019-10-20 02:03:23 +03:00
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
long ret ;
2017-03-17 09:18:50 +03:00
2019-10-20 02:03:23 +03:00
if ( ! percpu_ref_tryget ( & c - > writes ) )
return - EROFS ;
2019-09-08 01:04:23 +03:00
2021-11-06 20:39:42 +03:00
inode_lock ( & inode - > v ) ;
inode_dio_wait ( & inode - > v ) ;
bch2_pagecache_block_get ( & inode - > ei_pagecache_lock ) ;
2019-10-20 02:03:23 +03:00
if ( ! ( mode & ~ ( FALLOC_FL_KEEP_SIZE | FALLOC_FL_ZERO_RANGE ) ) )
ret = bchfs_fallocate ( inode , mode , offset , len ) ;
else if ( mode = = ( FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE ) )
ret = bchfs_fpunch ( inode , offset , len ) ;
else if ( mode = = FALLOC_FL_INSERT_RANGE )
ret = bchfs_fcollapse_finsert ( inode , offset , len , true ) ;
else if ( mode = = FALLOC_FL_COLLAPSE_RANGE )
ret = bchfs_fcollapse_finsert ( inode , offset , len , false ) ;
else
ret = - EOPNOTSUPP ;
2021-11-06 20:39:42 +03:00
bch2_pagecache_block_put ( & inode - > ei_pagecache_lock ) ;
inode_unlock ( & inode - > v ) ;
2019-10-20 02:03:23 +03:00
percpu_ref_put ( & c - > writes ) ;
2017-03-17 09:18:50 +03:00
2019-10-20 02:03:23 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
2019-08-16 16:59:56 +03:00
loff_t bch2_remap_file_range ( struct file * file_src , loff_t pos_src ,
struct file * file_dst , loff_t pos_dst ,
loff_t len , unsigned remap_flags )
{
struct bch_inode_info * src = file_bch_inode ( file_src ) ;
struct bch_inode_info * dst = file_bch_inode ( file_dst ) ;
struct bch_fs * c = src - > v . i_sb - > s_fs_info ;
2019-10-10 19:47:22 +03:00
s64 i_sectors_delta = 0 ;
2019-11-05 06:22:13 +03:00
u64 aligned_len ;
2019-08-16 16:59:56 +03:00
loff_t ret = 0 ;
if ( remap_flags & ~ ( REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY ) )
return - EINVAL ;
if ( remap_flags & REMAP_FILE_DEDUP )
return - EOPNOTSUPP ;
if ( ( pos_src & ( block_bytes ( c ) - 1 ) ) | |
( pos_dst & ( block_bytes ( c ) - 1 ) ) )
return - EINVAL ;
if ( src = = dst & &
abs ( pos_src - pos_dst ) < len )
return - EINVAL ;
bch2_lock_inodes ( INODE_LOCK | INODE_PAGECACHE_BLOCK , src , dst ) ;
2019-10-10 19:47:22 +03:00
file_update_time ( file_dst ) ;
2019-08-16 16:59:56 +03:00
inode_dio_wait ( & src - > v ) ;
inode_dio_wait ( & dst - > v ) ;
ret = generic_remap_file_range_prep ( file_src , pos_src ,
file_dst , pos_dst ,
& len , remap_flags ) ;
if ( ret < 0 | | len = = 0 )
2019-10-10 19:47:22 +03:00
goto err ;
2019-08-16 16:59:56 +03:00
2019-11-05 06:22:13 +03:00
aligned_len = round_up ( ( u64 ) len , block_bytes ( c ) ) ;
2019-08-16 16:59:56 +03:00
ret = write_invalidate_inode_pages_range ( dst - > v . i_mapping ,
2019-11-05 06:22:13 +03:00
pos_dst , pos_dst + len - 1 ) ;
2019-08-16 16:59:56 +03:00
if ( ret )
2019-10-10 19:47:22 +03:00
goto err ;
2019-08-16 16:59:56 +03:00
2021-11-24 02:21:09 +03:00
mark_pagecache_unallocated ( src , pos_src > > 9 ,
( pos_src + aligned_len ) > > 9 ) ;
2019-08-16 16:59:56 +03:00
2019-10-10 19:47:22 +03:00
ret = bch2_remap_range ( c ,
2021-03-16 07:28:17 +03:00
inode_inum ( dst ) , pos_dst > > 9 ,
inode_inum ( src ) , pos_src > > 9 ,
2019-08-16 16:59:56 +03:00
aligned_len > > 9 ,
2019-10-10 19:47:22 +03:00
pos_dst + len , & i_sectors_delta ) ;
if ( ret < 0 )
goto err ;
2019-08-16 16:59:56 +03:00
2019-10-10 19:47:22 +03:00
/*
* due to alignment , we might have remapped slightly more than requsted
*/
2019-11-05 06:22:13 +03:00
ret = min ( ( u64 ) ret < < 9 , ( u64 ) len ) ;
2019-10-10 19:47:22 +03:00
/* XXX get a quota reservation */
i_sectors_acct ( c , dst , NULL , i_sectors_delta ) ;
spin_lock ( & dst - > v . i_lock ) ;
2019-11-05 06:22:13 +03:00
if ( pos_dst + ret > dst - > v . i_size )
i_size_write ( & dst - > v , pos_dst + ret ) ;
2019-10-10 19:47:22 +03:00
spin_unlock ( & dst - > v . i_lock ) ;
2021-05-20 04:21:49 +03:00
2021-11-05 22:17:13 +03:00
if ( ( file_dst - > f_flags & ( __O_SYNC | O_DSYNC ) ) | |
IS_SYNC ( file_inode ( file_dst ) ) )
ret = bch2_flush_inode ( c , inode_inum ( dst ) ) ;
2019-10-10 19:47:22 +03:00
err :
2019-08-16 16:59:56 +03:00
bch2_unlock_inodes ( INODE_LOCK | INODE_PAGECACHE_BLOCK , src , dst ) ;
return ret ;
}
2017-03-17 09:18:50 +03:00
/* fseek: */
2019-07-30 20:49:17 +03:00
static int folio_data_offset ( struct folio * folio , unsigned offset )
2017-03-17 09:18:50 +03:00
{
2019-07-03 04:41:35 +03:00
struct bch_page_state * s = bch2_page_state ( & folio - > page ) ;
unsigned i ;
2019-07-30 20:49:17 +03:00
if ( s )
for ( i = offset > > 9 ; i < PAGE_SECTORS ; i + + )
if ( s - > s [ i ] . state > = SECTOR_DIRTY )
return i < < 9 ;
2018-11-15 05:53:40 +03:00
2019-07-30 20:49:17 +03:00
return - 1 ;
2017-03-17 09:18:50 +03:00
}
2019-07-30 20:49:17 +03:00
static loff_t bch2_seek_pagecache_data ( struct inode * vinode ,
2017-03-17 09:18:50 +03:00
loff_t start_offset ,
loff_t end_offset )
{
struct folio_batch fbatch ;
pgoff_t start_index = start_offset > > PAGE_SHIFT ;
pgoff_t end_index = end_offset > > PAGE_SHIFT ;
pgoff_t index = start_index ;
unsigned i ;
2019-07-30 20:49:17 +03:00
loff_t ret ;
int offset ;
2017-03-17 09:18:50 +03:00
folio_batch_init ( & fbatch ) ;
while ( filemap_get_folios ( vinode - > i_mapping ,
& index , end_index , & fbatch ) ) {
for ( i = 0 ; i < folio_batch_count ( & fbatch ) ; i + + ) {
struct folio * folio = fbatch . folios [ i ] ;
folio_lock ( folio ) ;
2019-07-30 20:49:17 +03:00
offset = folio_data_offset ( folio ,
folio - > index = = start_index
? start_offset & ( PAGE_SIZE - 1 )
: 0 ) ;
if ( offset > = 0 ) {
ret = clamp ( ( ( loff_t ) folio - > index < < PAGE_SHIFT ) +
offset ,
start_offset , end_offset ) ;
2017-03-17 09:18:50 +03:00
folio_unlock ( folio ) ;
folio_batch_release ( & fbatch ) ;
2019-07-30 20:49:17 +03:00
return ret ;
2017-03-17 09:18:50 +03:00
}
folio_unlock ( folio ) ;
}
folio_batch_release ( & fbatch ) ;
cond_resched ( ) ;
}
return end_offset ;
}
static loff_t bch2_seek_data ( struct file * file , u64 offset )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
subvol_inum inum = inode_inum ( inode ) ;
2017-03-17 09:18:50 +03:00
u64 isize , next_data = MAX_LFS_FILESIZE ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
int ret ;
isize = i_size_read ( & inode - > v ) ;
if ( offset > = isize )
return - ENXIO ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
2021-03-16 07:28:17 +03:00
SPOS ( inode - > v . i_ino , offset > > 9 , snapshot ) , 0 , k , ret ) {
2017-03-17 09:18:50 +03:00
if ( k . k - > p . inode ! = inode - > v . i_ino ) {
break ;
} else if ( bkey_extent_is_data ( k . k ) ) {
next_data = max ( offset , bkey_start_offset ( k . k ) < < 9 ) ;
break ;
} else if ( k . k - > p . offset > > 9 > isize )
break ;
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
if ( ret = = - EINTR )
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
if ( next_data > offset )
2019-07-30 20:49:17 +03:00
next_data = bch2_seek_pagecache_data ( & inode - > v ,
2017-03-17 09:18:50 +03:00
offset , next_data ) ;
2019-07-30 19:46:53 +03:00
if ( next_data > = isize )
2017-03-17 09:18:50 +03:00
return - ENXIO ;
return vfs_setpos ( file , next_data , MAX_LFS_FILESIZE ) ;
}
2019-07-30 20:49:17 +03:00
static int __page_hole_offset ( struct page * page , unsigned offset )
2017-03-17 09:18:50 +03:00
{
2019-07-30 20:49:17 +03:00
struct bch_page_state * s = bch2_page_state ( page ) ;
unsigned i ;
if ( ! s )
return 0 ;
for ( i = offset > > 9 ; i < PAGE_SECTORS ; i + + )
if ( s - > s [ i ] . state < SECTOR_DIRTY )
return i < < 9 ;
return - 1 ;
}
static loff_t page_hole_offset ( struct address_space * mapping , loff_t offset )
{
pgoff_t index = offset > > PAGE_SHIFT ;
2017-03-17 09:18:50 +03:00
struct page * page ;
2019-07-30 20:49:17 +03:00
int pg_offset ;
loff_t ret = - 1 ;
2017-03-17 09:18:50 +03:00
page = find_lock_page ( mapping , index ) ;
if ( ! page )
2019-07-30 20:49:17 +03:00
return offset ;
pg_offset = __page_hole_offset ( page , offset & ( PAGE_SIZE - 1 ) ) ;
if ( pg_offset > = 0 )
ret = ( ( loff_t ) index < < PAGE_SHIFT ) + pg_offset ;
2017-03-17 09:18:50 +03:00
unlock_page ( page ) ;
return ret ;
}
2019-07-30 20:49:17 +03:00
static loff_t bch2_seek_pagecache_hole ( struct inode * vinode ,
2017-03-17 09:18:50 +03:00
loff_t start_offset ,
loff_t end_offset )
{
struct address_space * mapping = vinode - > i_mapping ;
2019-07-30 20:49:17 +03:00
loff_t offset = start_offset , hole ;
2017-03-17 09:18:50 +03:00
2019-07-30 20:49:17 +03:00
while ( offset < end_offset ) {
hole = page_hole_offset ( mapping , offset ) ;
if ( hole > = 0 & & hole < = end_offset )
return max ( start_offset , hole ) ;
offset + = PAGE_SIZE ;
offset & = PAGE_MASK ;
}
2017-03-17 09:18:50 +03:00
return end_offset ;
}
static loff_t bch2_seek_hole ( struct file * file , u64 offset )
{
struct bch_inode_info * inode = file_bch_inode ( file ) ;
struct bch_fs * c = inode - > v . i_sb - > s_fs_info ;
2019-03-25 22:10:15 +03:00
struct btree_trans trans ;
2021-08-30 22:18:31 +03:00
struct btree_iter iter ;
2017-03-17 09:18:50 +03:00
struct bkey_s_c k ;
2021-03-16 07:28:17 +03:00
subvol_inum inum = inode_inum ( inode ) ;
2017-03-17 09:18:50 +03:00
u64 isize , next_hole = MAX_LFS_FILESIZE ;
2021-03-16 07:28:17 +03:00
u32 snapshot ;
2017-03-17 09:18:50 +03:00
int ret ;
isize = i_size_read ( & inode - > v ) ;
if ( offset > = isize )
return - ENXIO ;
2019-05-15 17:54:43 +03:00
bch2_trans_init ( & trans , c , 0 , 0 ) ;
2021-03-16 07:28:17 +03:00
retry :
bch2_trans_begin ( & trans ) ;
ret = bch2_subvolume_get_snapshot ( & trans , inum . subvol , & snapshot ) ;
if ( ret )
goto err ;
2019-03-25 22:10:15 +03:00
2021-10-21 19:05:21 +03:00
for_each_btree_key_norestart ( & trans , iter , BTREE_ID_extents ,
2021-03-16 07:28:17 +03:00
SPOS ( inode - > v . i_ino , offset > > 9 , snapshot ) ,
2019-04-17 22:49:28 +03:00
BTREE_ITER_SLOTS , k , ret ) {
2017-03-17 09:18:50 +03:00
if ( k . k - > p . inode ! = inode - > v . i_ino ) {
2019-07-30 20:49:17 +03:00
next_hole = bch2_seek_pagecache_hole ( & inode - > v ,
2017-03-17 09:18:50 +03:00
offset , MAX_LFS_FILESIZE ) ;
break ;
} else if ( ! bkey_extent_is_data ( k . k ) ) {
2019-07-30 20:49:17 +03:00
next_hole = bch2_seek_pagecache_hole ( & inode - > v ,
2017-03-17 09:18:50 +03:00
max ( offset , bkey_start_offset ( k . k ) < < 9 ) ,
k . k - > p . offset < < 9 ) ;
if ( next_hole < k . k - > p . offset < < 9 )
break ;
} else {
offset = max ( offset , bkey_start_offset ( k . k ) < < 9 ) ;
}
}
2021-08-30 22:18:31 +03:00
bch2_trans_iter_exit ( & trans , & iter ) ;
2021-03-16 07:28:17 +03:00
err :
if ( ret = = - EINTR )
goto retry ;
2017-03-17 09:18:50 +03:00
2021-10-19 22:08:00 +03:00
bch2_trans_exit ( & trans ) ;
2017-03-17 09:18:50 +03:00
if ( ret )
return ret ;
if ( next_hole > isize )
next_hole = isize ;
return vfs_setpos ( file , next_hole , MAX_LFS_FILESIZE ) ;
}
loff_t bch2_llseek ( struct file * file , loff_t offset , int whence )
{
switch ( whence ) {
case SEEK_SET :
case SEEK_CUR :
case SEEK_END :
return generic_file_llseek ( file , offset , whence ) ;
case SEEK_DATA :
return bch2_seek_data ( file , offset ) ;
case SEEK_HOLE :
return bch2_seek_hole ( file , offset ) ;
}
return - EINVAL ;
}
void bch2_fs_fsio_exit ( struct bch_fs * c )
{
bioset_exit ( & c - > dio_write_bioset ) ;
bioset_exit ( & c - > dio_read_bioset ) ;
bioset_exit ( & c - > writepage_bioset ) ;
}
int bch2_fs_fsio_init ( struct bch_fs * c )
{
int ret = 0 ;
pr_verbose_init ( c - > opts , " " ) ;
if ( bioset_init ( & c - > writepage_bioset ,
2019-10-09 19:50:39 +03:00
4 , offsetof ( struct bch_writepage_io , op . wbio . bio ) ,
2017-03-17 09:18:50 +03:00
BIOSET_NEED_BVECS ) | |
bioset_init ( & c - > dio_read_bioset ,
4 , offsetof ( struct dio_read , rbio . bio ) ,
BIOSET_NEED_BVECS ) | |
bioset_init ( & c - > dio_write_bioset ,
2019-10-09 19:50:39 +03:00
4 , offsetof ( struct dio_write , op . wbio . bio ) ,
2017-03-17 09:18:50 +03:00
BIOSET_NEED_BVECS ) )
ret = - ENOMEM ;
pr_verbose_init ( c - > opts , " ret %i " , ret ) ;
return ret ;
}
# endif /* NO_BCACHEFS_FS */