2021-09-07 16:13:02 +02:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
* Copyright ( C ) 2001 Andrea Arcangeli < andrea @ suse . de > SuSE
* Copyright ( C ) 2016 - 2020 Christoph Hellwig
*/
# include <linux/init.h>
# include <linux/mm.h>
# include <linux/blkdev.h>
# include <linux/buffer_head.h>
# include <linux/mpage.h>
# include <linux/uio.h>
# include <linux/namei.h>
# include <linux/task_io_accounting_ops.h>
# include <linux/falloc.h>
# include <linux/suspend.h>
2021-09-23 10:37:51 +08:00
# include <linux/fs.h>
2023-08-01 19:22:00 +02:00
# include <linux/iomap.h>
2021-12-02 12:34:00 -08:00
# include <linux/module.h>
2021-09-07 16:13:02 +02:00
# include "blk.h"
2021-10-13 09:57:11 +01:00
static inline struct inode * bdev_file_inode ( struct file * file )
2021-09-07 16:13:02 +02:00
{
return file - > f_mapping - > host ;
}
2022-07-14 11:06:32 -07:00
static blk_opf_t dio_bio_write_op ( struct kiocb * iocb )
2021-09-07 16:13:02 +02:00
{
2022-07-14 11:06:32 -07:00
blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE ;
2021-09-07 16:13:02 +02:00
/* avoid the need for a I/O completion work item */
2022-05-22 09:39:27 -04:00
if ( iocb_is_dsync ( iocb ) )
2022-07-14 11:06:32 -07:00
opf | = REQ_FUA ;
return opf ;
2021-09-07 16:13:02 +02:00
}
2022-06-10 12:58:24 -07:00
static bool blkdev_dio_unaligned ( struct block_device * bdev , loff_t pos ,
struct iov_iter * iter )
{
2022-06-10 12:58:29 -07:00
return pos & ( bdev_logical_block_size ( bdev ) - 1 ) | |
! bdev_iter_is_aligned ( bdev , iter ) ;
2022-06-10 12:58:24 -07:00
}
2021-09-07 16:13:02 +02:00
# define DIO_INLINE_BIO_VECS 4
static ssize_t __blkdev_direct_IO_simple ( struct kiocb * iocb ,
2024-04-15 12:20:20 +00:00
struct iov_iter * iter , struct block_device * bdev ,
unsigned int nr_pages )
2021-09-07 16:13:02 +02:00
{
struct bio_vec inline_vecs [ DIO_INLINE_BIO_VECS ] , * vecs ;
loff_t pos = iocb - > ki_pos ;
bool should_dirty = false ;
struct bio bio ;
ssize_t ret ;
if ( nr_pages < = DIO_INLINE_BIO_VECS )
vecs = inline_vecs ;
else {
vecs = kmalloc_array ( nr_pages , sizeof ( struct bio_vec ) ,
GFP_KERNEL ) ;
if ( ! vecs )
return - ENOMEM ;
}
2022-01-24 10:11:06 +01:00
if ( iov_iter_rw ( iter ) = = READ ) {
bio_init ( & bio , bdev , vecs , nr_pages , REQ_OP_READ ) ;
2022-05-22 14:59:25 -04:00
if ( user_backed_iter ( iter ) )
2022-01-24 10:11:06 +01:00
should_dirty = true ;
} else {
bio_init ( & bio , bdev , vecs , nr_pages , dio_bio_write_op ( iocb ) ) ;
}
2021-10-20 20:00:50 +01:00
bio . bi_iter . bi_sector = pos > > SECTOR_SHIFT ;
2024-02-02 12:39:25 -08:00
bio . bi_write_hint = file_inode ( iocb - > ki_filp ) - > i_write_hint ;
2021-09-07 16:13:02 +02:00
bio . bi_ioprio = iocb - > ki_ioprio ;
ret = bio_iov_iter_get_pages ( & bio , iter ) ;
if ( unlikely ( ret ) )
goto out ;
ret = bio . bi_iter . bi_size ;
2022-01-24 10:11:06 +01:00
if ( iov_iter_rw ( iter ) = = WRITE )
2021-09-07 16:13:02 +02:00
task_io_account_write ( ret ) ;
2022-01-24 10:11:06 +01:00
2021-09-07 16:13:02 +02:00
if ( iocb - > ki_flags & IOCB_NOWAIT )
bio . bi_opf | = REQ_NOWAIT ;
2022-04-20 22:31:10 +08:00
submit_bio_wait ( & bio ) ;
2021-09-07 16:13:02 +02:00
bio_release_pages ( & bio , should_dirty ) ;
if ( unlikely ( bio . bi_status ) )
ret = blk_status_to_errno ( bio . bi_status ) ;
out :
if ( vecs ! = inline_vecs )
kfree ( vecs ) ;
bio_uninit ( & bio ) ;
return ret ;
}
2021-10-14 11:17:43 -06:00
enum {
2021-10-27 13:21:09 +01:00
DIO_SHOULD_DIRTY = 1 ,
DIO_IS_SYNC = 2 ,
2021-10-14 11:17:43 -06:00
} ;
2021-09-07 16:13:02 +02:00
struct blkdev_dio {
union {
struct kiocb * iocb ;
struct task_struct * waiter ;
} ;
size_t size ;
atomic_t ref ;
2021-10-14 11:17:43 -06:00
unsigned int flags ;
2021-10-15 16:55:05 -06:00
struct bio bio ____cacheline_aligned_in_smp ;
2021-09-07 16:13:02 +02:00
} ;
static struct bio_set blkdev_dio_pool ;
static void blkdev_bio_end_io ( struct bio * bio )
{
struct blkdev_dio * dio = bio - > bi_private ;
2021-10-14 11:17:43 -06:00
bool should_dirty = dio - > flags & DIO_SHOULD_DIRTY ;
2021-09-07 16:13:02 +02:00
if ( bio - > bi_status & & ! dio - > bio . bi_status )
dio - > bio . bi_status = bio - > bi_status ;
2021-10-27 13:21:09 +01:00
if ( atomic_dec_and_test ( & dio - > ref ) ) {
2021-10-14 11:17:43 -06:00
if ( ! ( dio - > flags & DIO_IS_SYNC ) ) {
2021-09-07 16:13:02 +02:00
struct kiocb * iocb = dio - > iocb ;
ssize_t ret ;
2021-10-12 13:12:24 +02:00
WRITE_ONCE ( iocb - > private , NULL ) ;
2021-09-07 16:13:02 +02:00
if ( likely ( ! dio - > bio . bi_status ) ) {
ret = dio - > size ;
iocb - > ki_pos + = ret ;
} else {
ret = blk_status_to_errno ( dio - > bio . bi_status ) ;
}
2021-10-21 09:22:35 -06:00
dio - > iocb - > ki_complete ( iocb , ret ) ;
2021-10-27 13:21:09 +01:00
bio_put ( & dio - > bio ) ;
2021-09-07 16:13:02 +02:00
} else {
struct task_struct * waiter = dio - > waiter ;
WRITE_ONCE ( dio - > waiter , NULL ) ;
blk_wake_io_task ( waiter ) ;
}
}
if ( should_dirty ) {
bio_check_pages_dirty ( bio ) ;
} else {
bio_release_pages ( bio , false ) ;
bio_put ( bio ) ;
}
}
static ssize_t __blkdev_direct_IO ( struct kiocb * iocb , struct iov_iter * iter ,
2024-04-15 12:20:20 +00:00
struct block_device * bdev , unsigned int nr_pages )
2021-09-07 16:13:02 +02:00
{
struct blk_plug plug ;
struct blkdev_dio * dio ;
struct bio * bio ;
bool is_read = ( iov_iter_rw ( iter ) = = READ ) , is_sync ;
2022-07-14 11:06:32 -07:00
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op ( iocb ) ;
2021-09-07 16:13:02 +02:00
loff_t pos = iocb - > ki_pos ;
int ret = 0 ;
2022-03-24 16:35:24 -04:00
if ( iocb - > ki_flags & IOCB_ALLOC_CACHE )
opf | = REQ_ALLOC_CACHE ;
bio = bio_alloc_bioset ( bdev , nr_pages , opf , GFP_KERNEL ,
& blkdev_dio_pool ) ;
2021-09-07 16:13:02 +02:00
dio = container_of ( bio , struct blkdev_dio , bio ) ;
2021-10-27 13:21:09 +01:00
atomic_set ( & dio - > ref , 1 ) ;
/*
* Grab an extra reference to ensure the dio structure which is embedded
* into the first bio stays around .
*/
bio_get ( bio ) ;
2021-10-14 11:17:43 -06:00
is_sync = is_sync_kiocb ( iocb ) ;
if ( is_sync ) {
dio - > flags = DIO_IS_SYNC ;
2021-09-07 16:13:02 +02:00
dio - > waiter = current ;
} else {
2021-10-14 11:17:43 -06:00
dio - > flags = 0 ;
2021-09-07 16:13:02 +02:00
dio - > iocb = iocb ;
}
dio - > size = 0 ;
2022-05-22 14:59:25 -04:00
if ( is_read & & user_backed_iter ( iter ) )
2021-10-14 11:17:43 -06:00
dio - > flags | = DIO_SHOULD_DIRTY ;
2021-09-07 16:13:02 +02:00
2021-10-27 13:21:08 +01:00
blk_start_plug ( & plug ) ;
2021-09-07 16:13:02 +02:00
for ( ; ; ) {
2021-10-20 20:00:50 +01:00
bio - > bi_iter . bi_sector = pos > > SECTOR_SHIFT ;
2024-02-02 12:39:25 -08:00
bio - > bi_write_hint = file_inode ( iocb - > ki_filp ) - > i_write_hint ;
2021-09-07 16:13:02 +02:00
bio - > bi_private = dio ;
bio - > bi_end_io = blkdev_bio_end_io ;
bio - > bi_ioprio = iocb - > ki_ioprio ;
ret = bio_iov_iter_get_pages ( bio , iter ) ;
if ( unlikely ( ret ) ) {
bio - > bi_status = BLK_STS_IOERR ;
bio_endio ( bio ) ;
break ;
}
2023-01-16 08:55:53 -07:00
if ( iocb - > ki_flags & IOCB_NOWAIT ) {
/*
* This is nonblocking IO , and we need to allocate
* another bio if we have data left to map . As we
* cannot guarantee that one of the sub bios will not
* fail getting issued FOR NOWAIT and as error results
* are coalesced across all of them , be safe and ask for
* a retry of this from blocking context .
*/
if ( unlikely ( iov_iter_count ( iter ) ) ) {
bio_release_pages ( bio , false ) ;
bio_clear_flag ( bio , BIO_REFFED ) ;
bio_put ( bio ) ;
blk_finish_plug ( & plug ) ;
return - EAGAIN ;
}
bio - > bi_opf | = REQ_NOWAIT ;
}
2021-09-07 16:13:02 +02:00
if ( is_read ) {
2021-10-14 11:17:43 -06:00
if ( dio - > flags & DIO_SHOULD_DIRTY )
2021-09-07 16:13:02 +02:00
bio_set_pages_dirty ( bio ) ;
} else {
task_io_account_write ( bio - > bi_iter . bi_size ) ;
}
dio - > size + = bio - > bi_iter . bi_size ;
pos + = bio - > bi_iter . bi_size ;
nr_pages = bio_iov_vecs_to_alloc ( iter , BIO_MAX_VECS ) ;
if ( ! nr_pages ) {
2021-10-12 13:12:24 +02:00
submit_bio ( bio ) ;
2021-09-07 16:13:02 +02:00
break ;
}
2021-10-27 13:21:09 +01:00
atomic_inc ( & dio - > ref ) ;
2021-09-07 16:13:02 +02:00
submit_bio ( bio ) ;
2022-01-24 10:11:05 +01:00
bio = bio_alloc ( bdev , nr_pages , opf , GFP_KERNEL ) ;
2021-09-07 16:13:02 +02:00
}
2021-10-27 13:21:08 +01:00
blk_finish_plug ( & plug ) ;
2021-09-07 16:13:02 +02:00
if ( ! is_sync )
return - EIOCBQUEUED ;
for ( ; ; ) {
set_current_state ( TASK_UNINTERRUPTIBLE ) ;
if ( ! READ_ONCE ( dio - > waiter ) )
break ;
2021-10-27 13:21:08 +01:00
blk_io_schedule ( ) ;
2021-09-07 16:13:02 +02:00
}
__set_current_state ( TASK_RUNNING ) ;
if ( ! ret )
ret = blk_status_to_errno ( dio - > bio . bi_status ) ;
if ( likely ( ! ret ) )
ret = dio - > size ;
bio_put ( & dio - > bio ) ;
return ret ;
}
2021-10-23 17:21:32 +01:00
static void blkdev_bio_end_io_async ( struct bio * bio )
{
struct blkdev_dio * dio = container_of ( bio , struct blkdev_dio , bio ) ;
struct kiocb * iocb = dio - > iocb ;
ssize_t ret ;
2022-02-11 10:01:36 +01:00
WRITE_ONCE ( iocb - > private , NULL ) ;
2021-10-23 17:21:32 +01:00
if ( likely ( ! bio - > bi_status ) ) {
ret = dio - > size ;
iocb - > ki_pos + = ret ;
} else {
ret = blk_status_to_errno ( bio - > bi_status ) ;
}
2021-11-01 10:17:11 -07:00
iocb - > ki_complete ( iocb , ret ) ;
2021-10-23 17:21:32 +01:00
if ( dio - > flags & DIO_SHOULD_DIRTY ) {
bio_check_pages_dirty ( bio ) ;
} else {
bio_release_pages ( bio , false ) ;
bio_put ( bio ) ;
}
}
static ssize_t __blkdev_direct_IO_async ( struct kiocb * iocb ,
struct iov_iter * iter ,
2024-04-15 12:20:20 +00:00
struct block_device * bdev ,
2021-10-23 17:21:32 +01:00
unsigned int nr_pages )
{
2022-01-24 10:11:04 +01:00
bool is_read = iov_iter_rw ( iter ) = = READ ;
2022-07-14 11:06:32 -07:00
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op ( iocb ) ;
2021-10-23 17:21:32 +01:00
struct blkdev_dio * dio ;
struct bio * bio ;
loff_t pos = iocb - > ki_pos ;
int ret = 0 ;
2022-03-24 16:35:24 -04:00
if ( iocb - > ki_flags & IOCB_ALLOC_CACHE )
opf | = REQ_ALLOC_CACHE ;
bio = bio_alloc_bioset ( bdev , nr_pages , opf , GFP_KERNEL ,
& blkdev_dio_pool ) ;
2021-10-23 17:21:32 +01:00
dio = container_of ( bio , struct blkdev_dio , bio ) ;
dio - > flags = 0 ;
dio - > iocb = iocb ;
bio - > bi_iter . bi_sector = pos > > SECTOR_SHIFT ;
2024-02-02 12:39:25 -08:00
bio - > bi_write_hint = file_inode ( iocb - > ki_filp ) - > i_write_hint ;
2021-10-23 17:21:32 +01:00
bio - > bi_end_io = blkdev_bio_end_io_async ;
bio - > bi_ioprio = iocb - > ki_ioprio ;
2021-10-27 13:21:07 +01:00
if ( iov_iter_is_bvec ( iter ) ) {
/*
* Users don ' t rely on the iterator being in any particular
* state for async I / O returning - EIOCBQUEUED , hence we can
* avoid expensive iov_iter_advance ( ) . Bypass
* bio_iov_iter_get_pages ( ) and set the bvec directly .
*/
bio_iov_bvec_set ( bio , iter ) ;
} else {
ret = bio_iov_iter_get_pages ( bio , iter ) ;
if ( unlikely ( ret ) ) {
2021-12-07 20:16:36 +00:00
bio_put ( bio ) ;
2021-10-27 13:21:07 +01:00
return ret ;
}
2021-10-23 17:21:32 +01:00
}
dio - > size = bio - > bi_iter . bi_size ;
2022-01-24 10:11:04 +01:00
if ( is_read ) {
2022-05-22 14:59:25 -04:00
if ( user_backed_iter ( iter ) ) {
2021-10-23 17:21:32 +01:00
dio - > flags | = DIO_SHOULD_DIRTY ;
bio_set_pages_dirty ( bio ) ;
}
} else {
task_io_account_write ( bio - > bi_iter . bi_size ) ;
}
2023-08-08 11:06:17 -06:00
if ( iocb - > ki_flags & IOCB_NOWAIT )
bio - > bi_opf | = REQ_NOWAIT ;
2021-10-23 17:21:32 +01:00
if ( iocb - > ki_flags & IOCB_HIPRI ) {
2023-08-08 11:06:17 -06:00
bio - > bi_opf | = REQ_POLLED ;
2021-10-23 17:21:32 +01:00
submit_bio ( bio ) ;
WRITE_ONCE ( iocb - > private , bio ) ;
} else {
submit_bio ( bio ) ;
}
return - EIOCBQUEUED ;
}
2021-09-07 16:13:02 +02:00
static ssize_t blkdev_direct_IO ( struct kiocb * iocb , struct iov_iter * iter )
{
2024-04-15 12:20:20 +00:00
struct block_device * bdev = I_BDEV ( iocb - > ki_filp - > f_mapping - > host ) ;
2021-09-07 16:13:02 +02:00
unsigned int nr_pages ;
if ( ! iov_iter_count ( iter ) )
return 0 ;
2024-04-15 12:20:20 +00:00
if ( blkdev_dio_unaligned ( bdev , iocb - > ki_pos , iter ) )
return - EINVAL ;
2021-09-07 16:13:02 +02:00
nr_pages = bio_iov_vecs_to_alloc ( iter , BIO_MAX_VECS + 1 ) ;
2021-10-23 17:21:32 +01:00
if ( likely ( nr_pages < = BIO_MAX_VECS ) ) {
if ( is_sync_kiocb ( iocb ) )
2024-04-15 12:20:20 +00:00
return __blkdev_direct_IO_simple ( iocb , iter , bdev ,
nr_pages ) ;
return __blkdev_direct_IO_async ( iocb , iter , bdev , nr_pages ) ;
2021-10-23 17:21:32 +01:00
}
2024-04-15 12:20:20 +00:00
return __blkdev_direct_IO ( iocb , iter , bdev , bio_max_segs ( nr_pages ) ) ;
2021-09-07 16:13:02 +02:00
}
2023-08-01 19:22:00 +02:00
static int blkdev_iomap_begin ( struct inode * inode , loff_t offset , loff_t length ,
unsigned int flags , struct iomap * iomap , struct iomap * srcmap )
{
struct block_device * bdev = I_BDEV ( inode ) ;
loff_t isize = i_size_read ( inode ) ;
iomap - > bdev = bdev ;
iomap - > offset = ALIGN_DOWN ( offset , bdev_logical_block_size ( bdev ) ) ;
2024-05-03 10:10:42 +02:00
if ( offset > = isize )
2023-08-01 19:22:00 +02:00
return - EIO ;
iomap - > type = IOMAP_MAPPED ;
iomap - > addr = iomap - > offset ;
iomap - > length = isize - iomap - > offset ;
2023-08-01 19:22:01 +02:00
iomap - > flags | = IOMAP_F_BUFFER_HEAD ; /* noop for !CONFIG_BUFFER_HEAD */
2023-08-01 19:22:00 +02:00
return 0 ;
}
static const struct iomap_ops blkdev_iomap_ops = {
. iomap_begin = blkdev_iomap_begin ,
} ;
2023-08-01 19:22:01 +02:00
# ifdef CONFIG_BUFFER_HEAD
static int blkdev_get_block ( struct inode * inode , sector_t iblock ,
struct buffer_head * bh , int create )
{
bh - > b_bdev = I_BDEV ( inode ) ;
bh - > b_blocknr = iblock ;
set_buffer_mapped ( bh ) ;
return 0 ;
}
2023-12-15 20:02:44 +00:00
/*
* We cannot call mpage_writepages ( ) as it does not take the buffer lock .
* We must use block_write_full_folio ( ) directly which holds the buffer
* lock . The buffer lock provides the synchronisation with writeback
* that filesystems rely on when they use the blockdev ' s mapping .
*/
static int blkdev_writepages ( struct address_space * mapping ,
struct writeback_control * wbc )
2021-09-07 16:13:02 +02:00
{
2023-12-15 20:02:44 +00:00
struct blk_plug plug ;
int err ;
blk_start_plug ( & plug ) ;
err = write_cache_pages ( mapping , wbc , block_write_full_folio ,
blkdev_get_block ) ;
blk_finish_plug ( & plug ) ;
return err ;
2021-09-07 16:13:02 +02:00
}
2022-04-29 10:40:40 -04:00
static int blkdev_read_folio ( struct file * file , struct folio * folio )
2021-09-07 16:13:02 +02:00
{
2022-04-29 10:40:40 -04:00
return block_read_full_folio ( folio , blkdev_get_block ) ;
2021-09-07 16:13:02 +02:00
}
static void blkdev_readahead ( struct readahead_control * rac )
{
mpage_readahead ( rac , blkdev_get_block ) ;
}
static int blkdev_write_begin ( struct file * file , struct address_space * mapping ,
2022-02-22 14:31:43 -05:00
loff_t pos , unsigned len , struct page * * pagep , void * * fsdata )
2021-09-07 16:13:02 +02:00
{
2022-02-22 11:25:12 -05:00
return block_write_begin ( mapping , pos , len , pagep , blkdev_get_block ) ;
2021-09-07 16:13:02 +02:00
}
static int blkdev_write_end ( struct file * file , struct address_space * mapping ,
loff_t pos , unsigned len , unsigned copied , struct page * page ,
void * fsdata )
{
int ret ;
ret = block_write_end ( file , mapping , pos , len , copied , page , fsdata ) ;
unlock_page ( page ) ;
put_page ( page ) ;
return ret ;
}
const struct address_space_operations def_blk_aops = {
2022-02-09 20:22:12 +00:00
. dirty_folio = block_dirty_folio ,
2022-02-09 20:21:34 +00:00
. invalidate_folio = block_invalidate_folio ,
2022-04-29 10:40:40 -04:00
. read_folio = blkdev_read_folio ,
2021-09-07 16:13:02 +02:00
. readahead = blkdev_readahead ,
2023-12-15 20:02:44 +00:00
. writepages = blkdev_writepages ,
2021-09-07 16:13:02 +02:00
. write_begin = blkdev_write_begin ,
. write_end = blkdev_write_end ,
2022-06-06 10:20:31 -04:00
. migrate_folio = buffer_migrate_folio_norefs ,
2021-09-07 16:13:02 +02:00
. is_dirty_writeback = buffer_check_dirty_writeback ,
} ;
2023-08-01 19:22:01 +02:00
# else /* CONFIG_BUFFER_HEAD */
static int blkdev_read_folio ( struct file * file , struct folio * folio )
{
return iomap_read_folio ( folio , & blkdev_iomap_ops ) ;
}
static void blkdev_readahead ( struct readahead_control * rac )
{
iomap_readahead ( rac , & blkdev_iomap_ops ) ;
}
static int blkdev_map_blocks ( struct iomap_writepage_ctx * wpc ,
2023-12-07 08:27:10 +01:00
struct inode * inode , loff_t offset , unsigned int len )
2023-08-01 19:22:01 +02:00
{
loff_t isize = i_size_read ( inode ) ;
if ( WARN_ON_ONCE ( offset > = isize ) )
return - EIO ;
if ( offset > = wpc - > iomap . offset & &
offset < wpc - > iomap . offset + wpc - > iomap . length )
return 0 ;
return blkdev_iomap_begin ( inode , offset , isize - offset ,
IOMAP_WRITE , & wpc - > iomap , NULL ) ;
}
static const struct iomap_writeback_ops blkdev_writeback_ops = {
. map_blocks = blkdev_map_blocks ,
} ;
static int blkdev_writepages ( struct address_space * mapping ,
struct writeback_control * wbc )
{
struct iomap_writepage_ctx wpc = { } ;
return iomap_writepages ( mapping , wbc , & wpc , & blkdev_writeback_ops ) ;
}
const struct address_space_operations def_blk_aops = {
. dirty_folio = filemap_dirty_folio ,
. release_folio = iomap_release_folio ,
. invalidate_folio = iomap_invalidate_folio ,
. read_folio = blkdev_read_folio ,
. readahead = blkdev_readahead ,
. writepages = blkdev_writepages ,
. is_partially_uptodate = iomap_is_partially_uptodate ,
2023-11-17 16:14:47 +00:00
. error_remove_folio = generic_error_remove_folio ,
2023-08-01 19:22:01 +02:00
. migrate_folio = filemap_migrate_folio ,
} ;
# endif /* CONFIG_BUFFER_HEAD */
2021-09-07 16:13:02 +02:00
/*
* for a block special file file_inode ( file ) - > i_size is zero
* so we compute the size by hand ( just as in block_read / write above )
*/
static loff_t blkdev_llseek ( struct file * file , loff_t offset , int whence )
{
struct inode * bd_inode = bdev_file_inode ( file ) ;
loff_t retval ;
inode_lock ( bd_inode ) ;
retval = fixed_size_llseek ( file , offset , whence , i_size_read ( bd_inode ) ) ;
inode_unlock ( bd_inode ) ;
return retval ;
}
static int blkdev_fsync ( struct file * filp , loff_t start , loff_t end ,
int datasync )
{
2023-06-08 13:02:56 +02:00
struct block_device * bdev = I_BDEV ( filp - > f_mapping - > host ) ;
2021-09-07 16:13:02 +02:00
int error ;
error = file_write_and_wait_range ( filp , start , end ) ;
if ( error )
return error ;
/*
* There is no need to serialise calls to blkdev_issue_flush with
* i_mutex and doing so causes performance issues with concurrent
* O_SYNC writers to a block device .
*/
error = blkdev_issue_flush ( bdev ) ;
if ( error = = - EOPNOTSUPP )
error = 0 ;
return error ;
}
2023-09-27 11:34:08 +02:00
/**
* file_to_blk_mode - get block open flags from file flags
* @ file : file whose open flags should be converted
*
* Look at file open flags and generate corresponding block open flags from
* them . The function works both for file just being open ( e . g . during - > open
* callback ) and for file that is already open . This is actually non - trivial
* ( see comment in the function ) .
*/
2023-06-08 13:02:55 +02:00
blk_mode_t file_to_blk_mode ( struct file * file )
{
blk_mode_t mode = 0 ;
if ( file - > f_mode & FMODE_READ )
mode | = BLK_OPEN_READ ;
if ( file - > f_mode & FMODE_WRITE )
mode | = BLK_OPEN_WRITE ;
2023-09-27 11:34:08 +02:00
/*
2024-01-23 14:26:49 +01:00
* do_dentry_open ( ) clears O_EXCL from f_flags , use file - > private_data
* to determine whether the open was exclusive for already open files .
2023-09-27 11:34:08 +02:00
*/
2024-01-23 14:26:49 +01:00
if ( file - > private_data )
mode | = BLK_OPEN_EXCL ;
2023-09-27 11:34:08 +02:00
else if ( file - > f_flags & O_EXCL )
2023-06-08 13:02:55 +02:00
mode | = BLK_OPEN_EXCL ;
if ( file - > f_flags & O_NDELAY )
mode | = BLK_OPEN_NDELAY ;
/*
* If all bits in O_ACCMODE set ( aka O_RDWR | O_WRONLY ) , the floppy
* driver has historically allowed ioctls as if the file was opened for
* writing , but does not allow and actual reads or writes .
*/
if ( ( file - > f_flags & O_ACCMODE ) = = ( O_RDWR | O_WRONLY ) )
mode | = BLK_OPEN_WRITE_IOCTL ;
return mode ;
}
2021-09-07 16:13:02 +02:00
static int blkdev_open ( struct inode * inode , struct file * filp )
{
2024-01-23 14:26:46 +01:00
struct block_device * bdev ;
2023-09-27 11:34:08 +02:00
blk_mode_t mode ;
2024-01-23 14:26:46 +01:00
int ret ;
2021-09-07 16:13:02 +02:00
2023-09-27 11:34:08 +02:00
mode = file_to_blk_mode ( filp ) ;
2024-01-23 14:26:49 +01:00
/* Use the file as the holder. */
if ( mode & BLK_OPEN_EXCL )
filp - > private_data = filp ;
ret = bdev_permission ( inode - > i_rdev , mode , filp - > private_data ) ;
2024-01-23 14:26:46 +01:00
if ( ret )
return ret ;
bdev = blkdev_get_no_open ( inode - > i_rdev ) ;
if ( ! bdev )
return - ENXIO ;
2024-01-23 14:26:49 +01:00
ret = bdev_open ( bdev , mode , filp - > private_data , NULL , filp ) ;
2024-01-23 14:26:46 +01:00
if ( ret )
blkdev_put_no_open ( bdev ) ;
return ret ;
2021-09-07 16:13:02 +02:00
}
2023-06-08 13:02:38 +02:00
static int blkdev_release ( struct inode * inode , struct file * filp )
2021-09-07 16:13:02 +02:00
{
2024-01-23 14:26:49 +01:00
bdev_release ( filp ) ;
2021-09-07 16:13:02 +02:00
return 0 ;
}
2023-08-01 19:21:58 +02:00
static ssize_t
blkdev_direct_write ( struct kiocb * iocb , struct iov_iter * from )
{
size_t count = iov_iter_count ( from ) ;
ssize_t written ;
written = kiocb_invalidate_pages ( iocb , count ) ;
if ( written ) {
if ( written = = - EBUSY )
return 0 ;
return written ;
}
written = blkdev_direct_IO ( iocb , from ) ;
if ( written > 0 ) {
kiocb_invalidate_post_direct_write ( iocb , count ) ;
iocb - > ki_pos + = written ;
count - = written ;
}
if ( written ! = - EIOCBQUEUED )
iov_iter_revert ( from , count - iov_iter_count ( from ) ) ;
return written ;
}
2023-08-01 19:22:00 +02:00
static ssize_t blkdev_buffered_write ( struct kiocb * iocb , struct iov_iter * from )
{
return iomap_file_buffered_write ( iocb , from , & blkdev_iomap_ops ) ;
}
2021-09-07 16:13:02 +02:00
/*
* Write data to the block device . Only intended for the block device itself
* and the raw driver which basically is a fake block device .
*
* Does not take i_mutex for the write and thus is not for general purpose
* use .
*/
static ssize_t blkdev_write_iter ( struct kiocb * iocb , struct iov_iter * from )
{
2023-08-01 19:21:58 +02:00
struct file * file = iocb - > ki_filp ;
struct block_device * bdev = I_BDEV ( file - > f_mapping - > host ) ;
2021-10-13 09:57:11 +01:00
struct inode * bd_inode = bdev - > bd_inode ;
2021-11-04 15:13:17 -06:00
loff_t size = bdev_nr_bytes ( bdev ) ;
2021-09-07 16:13:02 +02:00
size_t shorted = 0 ;
ssize_t ret ;
2021-10-13 09:57:11 +01:00
if ( bdev_read_only ( bdev ) )
2021-09-07 16:13:02 +02:00
return - EPERM ;
if ( IS_SWAPFILE ( bd_inode ) & & ! is_hibernate_resume_dev ( bd_inode - > i_rdev ) )
return - ETXTBSY ;
if ( ! iov_iter_count ( from ) )
return 0 ;
if ( iocb - > ki_pos > = size )
return - ENOSPC ;
if ( ( iocb - > ki_flags & ( IOCB_NOWAIT | IOCB_DIRECT ) ) = = IOCB_NOWAIT )
return - EOPNOTSUPP ;
size - = iocb - > ki_pos ;
if ( iov_iter_count ( from ) > size ) {
shorted = iov_iter_count ( from ) - size ;
iov_iter_truncate ( from , size ) ;
}
2023-08-01 19:21:58 +02:00
ret = file_update_time ( file ) ;
if ( ret )
return ret ;
if ( iocb - > ki_flags & IOCB_DIRECT ) {
ret = blkdev_direct_write ( iocb , from ) ;
if ( ret > = 0 & & iov_iter_count ( from ) )
ret = direct_write_fallback ( iocb , from , ret ,
2023-08-01 19:22:00 +02:00
blkdev_buffered_write ( iocb , from ) ) ;
2023-08-01 19:21:58 +02:00
} else {
2023-08-01 19:22:00 +02:00
ret = blkdev_buffered_write ( iocb , from ) ;
2023-08-01 19:21:58 +02:00
}
2021-09-07 16:13:02 +02:00
if ( ret > 0 )
ret = generic_write_sync ( iocb , ret ) ;
iov_iter_reexpand ( from , iov_iter_count ( from ) + shorted ) ;
return ret ;
}
static ssize_t blkdev_read_iter ( struct kiocb * iocb , struct iov_iter * to )
{
2023-06-08 13:02:56 +02:00
struct block_device * bdev = I_BDEV ( iocb - > ki_filp - > f_mapping - > host ) ;
2021-11-04 15:13:17 -06:00
loff_t size = bdev_nr_bytes ( bdev ) ;
2021-09-07 16:13:02 +02:00
loff_t pos = iocb - > ki_pos ;
size_t shorted = 0 ;
2021-10-28 08:57:09 -06:00
ssize_t ret = 0 ;
2022-02-01 11:04:20 +01:00
size_t count ;
2021-09-07 16:13:02 +02:00
2022-02-01 11:04:20 +01:00
if ( unlikely ( pos + iov_iter_count ( to ) > size ) ) {
2021-10-20 20:00:48 +01:00
if ( pos > = size )
return 0 ;
size - = pos ;
2022-02-01 11:04:20 +01:00
shorted = iov_iter_count ( to ) - size ;
iov_iter_truncate ( to , size ) ;
2021-09-07 16:13:02 +02:00
}
2022-02-01 11:04:20 +01:00
count = iov_iter_count ( to ) ;
if ( ! count )
goto reexpand ; /* skip atime */
2021-10-28 08:57:09 -06:00
if ( iocb - > ki_flags & IOCB_DIRECT ) {
2023-06-01 16:58:56 +02:00
ret = kiocb_write_and_wait ( iocb , count ) ;
if ( ret < 0 )
goto reexpand ;
2021-10-28 08:57:09 -06:00
file_accessed ( iocb - > ki_filp ) ;
ret = blkdev_direct_IO ( iocb , to ) ;
if ( ret > = 0 ) {
iocb - > ki_pos + = ret ;
count - = ret ;
}
2022-02-01 11:04:20 +01:00
iov_iter_revert ( to , count - iov_iter_count ( to ) ) ;
2021-10-28 08:57:09 -06:00
if ( ret < 0 | | ! count )
2022-02-01 11:04:20 +01:00
goto reexpand ;
2021-10-28 08:57:09 -06:00
}
ret = filemap_read ( iocb , to , ret ) ;
2021-10-20 20:00:48 +01:00
2022-02-01 11:04:20 +01:00
reexpand :
2021-10-20 20:00:48 +01:00
if ( unlikely ( shorted ) )
iov_iter_reexpand ( to , iov_iter_count ( to ) + shorted ) ;
2021-09-07 16:13:02 +02:00
return ret ;
}
# define BLKDEV_FALLOC_FL_SUPPORTED \
( FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE )
static long blkdev_fallocate ( struct file * file , int mode , loff_t start ,
loff_t len )
{
2021-09-23 10:37:51 +08:00
struct inode * inode = bdev_file_inode ( file ) ;
struct block_device * bdev = I_BDEV ( inode ) ;
2021-09-07 16:13:02 +02:00
loff_t end = start + len - 1 ;
loff_t isize ;
int error ;
/* Fail if we don't recognize the flags. */
if ( mode & ~ BLKDEV_FALLOC_FL_SUPPORTED )
return - EOPNOTSUPP ;
/* Don't go off the end of the device. */
2021-10-18 12:11:24 +02:00
isize = bdev_nr_bytes ( bdev ) ;
2021-09-07 16:13:02 +02:00
if ( start > = isize )
return - EINVAL ;
if ( end > = isize ) {
if ( mode & FALLOC_FL_KEEP_SIZE ) {
len = isize - start ;
end = start + len - 1 ;
} else
return - EINVAL ;
}
/*
* Don ' t allow IO that isn ' t aligned to logical block size .
*/
if ( ( start | len ) & ( bdev_logical_block_size ( bdev ) - 1 ) )
return - EINVAL ;
2021-09-23 10:37:51 +08:00
filemap_invalidate_lock ( inode - > i_mapping ) ;
2023-10-11 13:12:30 -07:00
/*
* Invalidate the page cache , including dirty pages , for valid
* de - allocate mode calls to fallocate ( ) .
*/
2021-09-07 16:13:02 +02:00
switch ( mode ) {
case FALLOC_FL_ZERO_RANGE :
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE :
2023-10-11 13:12:30 -07:00
error = truncate_bdev_range ( bdev , file_to_blk_mode ( file ) , start , end ) ;
if ( error )
goto fail ;
2021-10-20 20:00:50 +01:00
error = blkdev_issue_zeroout ( bdev , start > > SECTOR_SHIFT ,
len > > SECTOR_SHIFT , GFP_KERNEL ,
BLKDEV_ZERO_NOUNMAP ) ;
2021-09-07 16:13:02 +02:00
break ;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE :
2023-10-11 13:12:30 -07:00
error = truncate_bdev_range ( bdev , file_to_blk_mode ( file ) , start , end ) ;
if ( error )
goto fail ;
2021-10-20 20:00:50 +01:00
error = blkdev_issue_zeroout ( bdev , start > > SECTOR_SHIFT ,
len > > SECTOR_SHIFT , GFP_KERNEL ,
BLKDEV_ZERO_NOFALLBACK ) ;
2021-09-07 16:13:02 +02:00
break ;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE :
2023-10-11 13:12:30 -07:00
error = truncate_bdev_range ( bdev , file_to_blk_mode ( file ) , start , end ) ;
if ( error )
goto fail ;
2021-10-20 20:00:50 +01:00
error = blkdev_issue_discard ( bdev , start > > SECTOR_SHIFT ,
2022-04-15 06:52:57 +02:00
len > > SECTOR_SHIFT , GFP_KERNEL ) ;
2021-09-07 16:13:02 +02:00
break ;
default :
2021-09-23 10:37:51 +08:00
error = - EOPNOTSUPP ;
2021-09-07 16:13:02 +02:00
}
2021-09-23 10:37:51 +08:00
fail :
filemap_invalidate_unlock ( inode - > i_mapping ) ;
return error ;
2021-09-07 16:13:02 +02:00
}
2023-05-10 09:42:23 +02:00
static int blkdev_mmap ( struct file * file , struct vm_area_struct * vma )
{
struct inode * bd_inode = bdev_file_inode ( file ) ;
if ( bdev_read_only ( I_BDEV ( bd_inode ) ) )
return generic_file_readonly_mmap ( file , vma ) ;
return generic_file_mmap ( file , vma ) ;
}
2021-09-07 16:13:02 +02:00
const struct file_operations def_blk_fops = {
. open = blkdev_open ,
2023-06-08 13:02:38 +02:00
. release = blkdev_release ,
2021-09-07 16:13:02 +02:00
. llseek = blkdev_llseek ,
. read_iter = blkdev_read_iter ,
. write_iter = blkdev_write_iter ,
2021-10-12 13:12:24 +02:00
. iopoll = iocb_bio_iopoll ,
2023-05-10 09:42:23 +02:00
. mmap = blkdev_mmap ,
2021-09-07 16:13:02 +02:00
. fsync = blkdev_fsync ,
2021-10-12 12:44:50 +02:00
. unlocked_ioctl = blkdev_ioctl ,
2021-09-07 16:13:02 +02:00
# ifdef CONFIG_COMPAT
. compat_ioctl = compat_blkdev_ioctl ,
# endif
2023-05-22 14:50:15 +01:00
. splice_read = filemap_splice_read ,
2021-09-07 16:13:02 +02:00
. splice_write = iter_file_splice_write ,
. fallocate = blkdev_fallocate ,
2024-03-28 13:27:24 +01:00
. fop_flags = FOP_BUFFER_RASYNC ,
2021-09-07 16:13:02 +02:00
} ;
static __init int blkdev_init ( void )
{
return bioset_init ( & blkdev_dio_pool , 4 ,
offsetof ( struct blkdev_dio , bio ) ,
BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE ) ;
}
module_init ( blkdev_init ) ;