2005-12-15 14:31:24 -08:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* io . c
*
* Buffer cache handling
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/highmem.h>
2016-11-01 07:40:13 -06:00
# include <linux/bio.h>
2005-12-15 14:31:24 -08:00
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
# include "inode.h"
# include "journal.h"
# include "uptodate.h"
# include "buffer_head_io.h"
2011-02-24 16:09:38 +08:00
# include "ocfs2_trace.h"
2005-12-15 14:31:24 -08:00
2008-11-13 14:49:19 -08:00
/*
* Bits on bh - > b_state used by ocfs2 .
*
2008-11-18 17:16:47 -08:00
* These MUST be after the JBD2 bits . Hence , we use BH_JBDPrivateStart .
2008-11-13 14:49:19 -08:00
*/
enum ocfs2_state_bits {
2008-11-18 17:16:47 -08:00
BH_NeedsValidate = BH_JBDPrivateStart ,
2008-11-13 14:49:19 -08:00
} ;
/* Expand the magic b_state functions */
BUFFER_FNS ( NeedsValidate , needs_validate ) ;
2005-12-15 14:31:24 -08:00
int ocfs2_write_block ( struct ocfs2_super * osb , struct buffer_head * bh ,
2009-02-10 20:00:41 -08:00
struct ocfs2_caching_info * ci )
2005-12-15 14:31:24 -08:00
{
int ret = 0 ;
2011-02-24 16:09:38 +08:00
trace_ocfs2_write_block ( ( unsigned long long ) bh - > b_blocknr , ci ) ;
2005-12-15 14:31:24 -08:00
BUG_ON ( bh - > b_blocknr < OCFS2_SUPER_BLOCK_BLKNO ) ;
BUG_ON ( buffer_jbd ( bh ) ) ;
/* No need to check for a soft readonly file system here. non
* journalled writes are only ever done on system files which
* can get modified during recovery even if read - only . */
if ( ocfs2_is_hard_readonly ( osb ) ) {
ret = - EROFS ;
2011-03-07 16:43:21 +08:00
mlog_errno ( ret ) ;
2005-12-15 14:31:24 -08:00
goto out ;
}
2009-02-10 20:00:41 -08:00
ocfs2_metadata_cache_io_lock ( ci ) ;
2005-12-15 14:31:24 -08:00
lock_buffer ( bh ) ;
set_buffer_uptodate ( bh ) ;
/* remove from dirty list before I/O. */
clear_buffer_dirty ( bh ) ;
2008-10-09 17:20:29 -07:00
get_bh ( bh ) ; /* for end_buffer_write_sync() */
2005-12-15 14:31:24 -08:00
bh - > b_end_io = end_buffer_write_sync ;
2016-06-05 14:31:43 -05:00
submit_bh ( REQ_OP_WRITE , 0 , bh ) ;
2005-12-15 14:31:24 -08:00
wait_on_buffer ( bh ) ;
if ( buffer_uptodate ( bh ) ) {
2009-02-10 20:00:41 -08:00
ocfs2_set_buffer_uptodate ( ci , bh ) ;
2005-12-15 14:31:24 -08:00
} else {
/* We don't need to remove the clustered uptodate
* information for this bh as it ' s not marked locally
* uptodate . */
ret = - EIO ;
2011-03-07 16:43:21 +08:00
mlog_errno ( ret ) ;
2005-12-15 14:31:24 -08:00
}
2009-02-10 20:00:41 -08:00
ocfs2_metadata_cache_io_unlock ( ci ) ;
2005-12-15 14:31:24 -08:00
out :
return ret ;
}
2018-11-02 15:48:19 -07:00
/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
* will be easier to handle read failure .
*/
2008-10-09 17:20:29 -07:00
int ocfs2_read_blocks_sync ( struct ocfs2_super * osb , u64 block ,
unsigned int nr , struct buffer_head * bhs [ ] )
{
int status = 0 ;
unsigned int i ;
struct buffer_head * bh ;
2018-11-02 15:48:19 -07:00
int new_bh = 0 ;
2008-10-09 17:20:29 -07:00
2011-02-24 16:09:38 +08:00
trace_ocfs2_read_blocks_sync ( ( unsigned long long ) block , nr ) ;
if ( ! nr )
2008-10-09 17:20:29 -07:00
goto bail ;
2018-11-02 15:48:19 -07:00
/* Don't put buffer head and re-assign it to NULL if it is allocated
* outside since the caller can ' t be aware of this alternation !
*/
new_bh = ( bhs [ 0 ] = = NULL ) ;
2008-10-09 17:20:29 -07:00
for ( i = 0 ; i < nr ; i + + ) {
if ( bhs [ i ] = = NULL ) {
bhs [ i ] = sb_getblk ( osb - > sb , block + + ) ;
if ( bhs [ i ] = = NULL ) {
2013-11-12 15:06:54 -08:00
status = - ENOMEM ;
2008-10-09 17:20:29 -07:00
mlog_errno ( status ) ;
2018-11-02 15:48:19 -07:00
break ;
2008-10-09 17:20:29 -07:00
}
}
bh = bhs [ i ] ;
if ( buffer_jbd ( bh ) ) {
2011-02-24 16:09:38 +08:00
trace_ocfs2_read_blocks_sync_jbd (
( unsigned long long ) bh - > b_blocknr ) ;
2008-10-09 17:20:29 -07:00
continue ;
}
if ( buffer_dirty ( bh ) ) {
/* This should probably be a BUG, or
* at least return an error . */
mlog ( ML_ERROR ,
" trying to sync read a dirty "
" buffer! (blocknr = %llu), skipping \n " ,
( unsigned long long ) bh - > b_blocknr ) ;
continue ;
}
lock_buffer ( bh ) ;
if ( buffer_jbd ( bh ) ) {
2016-06-24 14:50:13 -07:00
# ifdef CATCH_BH_JBD_RACES
2008-10-09 17:20:29 -07:00
mlog ( ML_ERROR ,
" block %llu had the JBD bit set "
" while I was in lock_buffer! " ,
( unsigned long long ) bh - > b_blocknr ) ;
BUG ( ) ;
2016-06-24 14:50:13 -07:00
# else
unlock_buffer ( bh ) ;
continue ;
# endif
2008-10-09 17:20:29 -07:00
}
get_bh ( bh ) ; /* for end_buffer_read_sync() */
bh - > b_end_io = end_buffer_read_sync ;
2016-06-05 14:31:43 -05:00
submit_bh ( REQ_OP_READ , 0 , bh ) ;
2008-10-09 17:20:29 -07:00
}
2018-11-02 15:48:19 -07:00
read_failure :
2008-10-09 17:20:29 -07:00
for ( i = nr ; i > 0 ; i - - ) {
bh = bhs [ i - 1 ] ;
2018-11-02 15:48:19 -07:00
if ( unlikely ( status ) ) {
if ( new_bh & & bh ) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
* aovoid bh leak
*/
if ( ! buffer_jbd ( bh ) )
wait_on_buffer ( bh ) ;
put_bh ( bh ) ;
bhs [ i - 1 ] = NULL ;
} else if ( bh & & buffer_uptodate ( bh ) ) {
clear_buffer_uptodate ( bh ) ;
}
continue ;
}
2008-11-21 14:06:55 -08:00
/* No need to wait on the buffer if it's managed by JBD. */
if ( ! buffer_jbd ( bh ) )
wait_on_buffer ( bh ) ;
2008-10-09 17:20:29 -07:00
if ( ! buffer_uptodate ( bh ) ) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
* to cleanup the other buffers . */
status = - EIO ;
2018-11-02 15:48:19 -07:00
goto read_failure ;
2008-10-09 17:20:29 -07:00
}
}
bail :
return status ;
}
2018-11-02 15:48:19 -07:00
/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
* will be easier to handle read failure .
*/
2009-02-10 20:00:41 -08:00
int ocfs2_read_blocks ( struct ocfs2_caching_info * ci , u64 block , int nr ,
2008-11-13 14:49:19 -08:00
struct buffer_head * bhs [ ] , int flags ,
int ( * validate ) ( struct super_block * sb ,
struct buffer_head * bh ) )
2005-12-15 14:31:24 -08:00
{
int status = 0 ;
int i , ignore_cache = 0 ;
struct buffer_head * bh ;
2009-02-10 20:00:41 -08:00
struct super_block * sb = ocfs2_metadata_cache_get_super ( ci ) ;
2018-11-02 15:48:19 -07:00
int new_bh = 0 ;
2005-12-15 14:31:24 -08:00
2011-02-24 16:09:38 +08:00
trace_ocfs2_read_blocks_begin ( ci , ( unsigned long long ) block , nr , flags ) ;
2005-12-15 14:31:24 -08:00
2009-02-10 20:00:41 -08:00
BUG_ON ( ! ci ) ;
2008-10-09 17:20:34 -07:00
BUG_ON ( ( flags & OCFS2_BH_READAHEAD ) & &
( flags & OCFS2_BH_IGNORE_CACHE ) ) ;
2006-04-21 13:49:02 -07:00
2008-10-09 17:20:30 -07:00
if ( bhs = = NULL ) {
2005-12-15 14:31:24 -08:00
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
if ( nr < 0 ) {
mlog ( ML_ERROR , " asked to read %d blocks! \n " , nr ) ;
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
if ( nr = = 0 ) {
status = 0 ;
goto bail ;
}
2018-11-02 15:48:19 -07:00
/* Don't put buffer head and re-assign it to NULL if it is allocated
* outside since the caller can ' t be aware of this alternation !
*/
new_bh = ( bhs [ 0 ] = = NULL ) ;
2009-02-10 20:00:41 -08:00
ocfs2_metadata_cache_io_lock ( ci ) ;
2005-12-15 14:31:24 -08:00
for ( i = 0 ; i < nr ; i + + ) {
if ( bhs [ i ] = = NULL ) {
2009-02-10 20:00:41 -08:00
bhs [ i ] = sb_getblk ( sb , block + + ) ;
2005-12-15 14:31:24 -08:00
if ( bhs [ i ] = = NULL ) {
2009-02-10 20:00:41 -08:00
ocfs2_metadata_cache_io_unlock ( ci ) ;
2013-11-12 15:06:54 -08:00
status = - ENOMEM ;
2005-12-15 14:31:24 -08:00
mlog_errno ( status ) ;
2018-11-02 15:48:19 -07:00
/* Don't forget to put previous bh! */
break ;
2005-12-15 14:31:24 -08:00
}
}
bh = bhs [ i ] ;
2008-10-09 17:20:34 -07:00
ignore_cache = ( flags & OCFS2_BH_IGNORE_CACHE ) ;
2005-12-15 14:31:24 -08:00
2006-04-21 13:49:02 -07:00
/* There are three read-ahead cases here which we need to
* be concerned with . All three assume a buffer has
* previously been submitted with OCFS2_BH_READAHEAD
* and it hasn ' t yet completed I / O .
*
* 1 ) The current request is sync to disk . This rarely
* happens these days , and never when performance
* matters - the code can just wait on the buffer
* lock and re - submit .
*
* 2 ) The current request is cached , but not
* readahead . ocfs2_buffer_uptodate ( ) will return
* false anyway , so we ' ll wind up waiting on the
* buffer lock to do I / O . We re - check the request
* with after getting the lock to avoid a re - submit .
*
* 3 ) The current request is readahead ( and so must
* also be a caching one ) . We short circuit if the
* buffer is locked ( under I / O ) and if it ' s in the
* uptodate cache . The re - check from # 2 catches the
* case that the previous read - ahead completes just
* before our is - it - in - flight check .
*/
2009-02-10 20:00:41 -08:00
if ( ! ignore_cache & & ! ocfs2_buffer_uptodate ( ci , bh ) ) {
2011-02-24 16:22:20 +08:00
trace_ocfs2_read_blocks_from_disk (
2005-12-15 14:31:24 -08:00
( unsigned long long ) bh - > b_blocknr ,
2009-02-10 20:00:41 -08:00
( unsigned long long ) ocfs2_metadata_cache_owner ( ci ) ) ;
2008-10-09 17:20:34 -07:00
/* We're using ignore_cache here to say
* " go to disk " */
2005-12-15 14:31:24 -08:00
ignore_cache = 1 ;
}
2011-02-24 16:09:38 +08:00
trace_ocfs2_read_blocks_bh ( ( unsigned long long ) bh - > b_blocknr ,
ignore_cache , buffer_jbd ( bh ) , buffer_dirty ( bh ) ) ;
2005-12-15 14:31:24 -08:00
if ( buffer_jbd ( bh ) ) {
continue ;
}
2008-10-09 17:20:34 -07:00
if ( ignore_cache ) {
2005-12-15 14:31:24 -08:00
if ( buffer_dirty ( bh ) ) {
/* This should probably be a BUG, or
* at least return an error . */
continue ;
}
2006-04-21 13:49:02 -07:00
/* A read-ahead request was made - if the
* buffer is already under read - ahead from a
* previously submitted request than we are
* done here . */
if ( ( flags & OCFS2_BH_READAHEAD )
2009-02-10 20:00:41 -08:00
& & ocfs2_buffer_read_ahead ( ci , bh ) )
2006-04-21 13:49:02 -07:00
continue ;
2005-12-15 14:31:24 -08:00
lock_buffer ( bh ) ;
if ( buffer_jbd ( bh ) ) {
# ifdef CATCH_BH_JBD_RACES
mlog ( ML_ERROR , " block %llu had the JBD bit set "
" while I was in lock_buffer! " ,
( unsigned long long ) bh - > b_blocknr ) ;
BUG ( ) ;
# else
unlock_buffer ( bh ) ;
continue ;
# endif
}
2006-04-21 13:49:02 -07:00
/* Re-check ocfs2_buffer_uptodate() as a
* previously read - ahead buffer may have
* completed I / O while we were waiting for the
* buffer lock . */
2008-10-09 17:20:34 -07:00
if ( ! ( flags & OCFS2_BH_IGNORE_CACHE )
2006-04-21 13:49:02 -07:00
& & ! ( flags & OCFS2_BH_READAHEAD )
2009-02-10 20:00:41 -08:00
& & ocfs2_buffer_uptodate ( ci , bh ) ) {
2006-04-21 13:49:02 -07:00
unlock_buffer ( bh ) ;
continue ;
}
2005-12-15 14:31:24 -08:00
get_bh ( bh ) ; /* for end_buffer_read_sync() */
2008-11-13 14:49:19 -08:00
if ( validate )
set_buffer_needs_validate ( bh ) ;
2005-12-15 14:31:24 -08:00
bh - > b_end_io = end_buffer_read_sync ;
2016-06-05 14:31:43 -05:00
submit_bh ( REQ_OP_READ , 0 , bh ) ;
2005-12-15 14:31:24 -08:00
continue ;
}
}
2018-11-02 15:48:19 -07:00
read_failure :
2005-12-15 14:31:24 -08:00
for ( i = ( nr - 1 ) ; i > = 0 ; i - - ) {
bh = bhs [ i ] ;
2006-04-21 13:49:02 -07:00
if ( ! ( flags & OCFS2_BH_READAHEAD ) ) {
2018-11-02 15:48:19 -07:00
if ( unlikely ( status ) ) {
/* Clear the buffers on error including those
* ever succeeded in reading
*/
if ( new_bh & & bh ) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
* aovoid bh leak
*/
if ( ! buffer_jbd ( bh ) )
wait_on_buffer ( bh ) ;
put_bh ( bh ) ;
bhs [ i ] = NULL ;
} else if ( bh & & buffer_uptodate ( bh ) ) {
clear_buffer_uptodate ( bh ) ;
}
2015-09-04 15:44:20 -07:00
continue ;
}
2006-04-21 13:49:02 -07:00
/* We know this can't have changed as we hold the
2009-02-10 20:00:41 -08:00
* owner sem . Avoid doing any work on the bh if the
2006-04-21 13:49:02 -07:00
* journal has it . */
if ( ! buffer_jbd ( bh ) )
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
/* Status won't be cleared from here on out,
* so we can safely record this and loop back
* to cleanup the other buffers . Don ' t need to
* remove the clustered uptodate information
* for this bh as it ' s not marked locally
* uptodate . */
status = - EIO ;
2018-09-20 12:22:51 -07:00
clear_buffer_needs_validate ( bh ) ;
2018-11-02 15:48:19 -07:00
goto read_failure ;
2006-04-21 13:49:02 -07:00
}
2008-11-13 14:49:19 -08:00
if ( buffer_needs_validate ( bh ) ) {
/* We never set NeedsValidate if the
* buffer was held by the journal , so
* that better not have changed */
BUG_ON ( buffer_jbd ( bh ) ) ;
clear_buffer_needs_validate ( bh ) ;
2009-02-10 20:00:41 -08:00
status = validate ( sb , bh ) ;
2018-11-02 15:48:19 -07:00
if ( status )
goto read_failure ;
2008-11-13 14:49:19 -08:00
}
2005-12-15 14:31:24 -08:00
}
2006-04-21 13:49:02 -07:00
/* Always set the buffer in the cache, even if it was
* a forced read , or read - ahead which hasn ' t yet
* completed . */
2009-02-10 20:00:41 -08:00
ocfs2_set_buffer_uptodate ( ci , bh ) ;
2005-12-15 14:31:24 -08:00
}
2009-02-10 20:00:41 -08:00
ocfs2_metadata_cache_io_unlock ( ci ) ;
2005-12-15 14:31:24 -08:00
2011-02-24 16:09:38 +08:00
trace_ocfs2_read_blocks_end ( ( unsigned long long ) block , nr ,
flags , ignore_cache ) ;
2005-12-15 14:31:24 -08:00
bail :
return status ;
}
2007-12-18 15:47:03 +08:00
/* Check whether the blkno is the super block or one of the backups. */
static void ocfs2_check_super_or_backup ( struct super_block * sb ,
sector_t blkno )
{
int i ;
u64 backup_blkno ;
if ( blkno = = OCFS2_SUPER_BLOCK_BLKNO )
return ;
for ( i = 0 ; i < OCFS2_MAX_BACKUP_SUPERBLOCKS ; i + + ) {
backup_blkno = ocfs2_backup_super_blkno ( sb , i ) ;
if ( backup_blkno = = blkno )
return ;
}
BUG ( ) ;
}
/*
* Write super block and backups doesn ' t need to collaborate with journal ,
2009-02-10 20:00:41 -08:00
* so we don ' t need to lock ip_io_mutex and ci doesn ' t need to bea passed
2007-12-18 15:47:03 +08:00
* into this function .
*/
int ocfs2_write_super_or_backup ( struct ocfs2_super * osb ,
struct buffer_head * bh )
{
int ret = 0 ;
2010-03-31 18:25:44 -07:00
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) bh - > b_data ;
2007-12-18 15:47:03 +08:00
BUG_ON ( buffer_jbd ( bh ) ) ;
ocfs2_check_super_or_backup ( osb - > sb , bh - > b_blocknr ) ;
if ( ocfs2_is_hard_readonly ( osb ) | | ocfs2_is_soft_readonly ( osb ) ) {
ret = - EROFS ;
2011-03-07 16:43:21 +08:00
mlog_errno ( ret ) ;
2007-12-18 15:47:03 +08:00
goto out ;
}
lock_buffer ( bh ) ;
set_buffer_uptodate ( bh ) ;
/* remove from dirty list before I/O. */
clear_buffer_dirty ( bh ) ;
get_bh ( bh ) ; /* for end_buffer_write_sync() */
bh - > b_end_io = end_buffer_write_sync ;
2010-03-31 18:25:44 -07:00
ocfs2_compute_meta_ecc ( osb - > sb , bh - > b_data , & di - > i_check ) ;
2016-06-05 14:31:43 -05:00
submit_bh ( REQ_OP_WRITE , 0 , bh ) ;
2007-12-18 15:47:03 +08:00
wait_on_buffer ( bh ) ;
if ( ! buffer_uptodate ( bh ) ) {
ret = - EIO ;
2011-03-07 16:43:21 +08:00
mlog_errno ( ret ) ;
2007-12-18 15:47:03 +08:00
}
out :
return ret ;
}