2008-01-29 16:53:40 +03:00
/*
* Functions related to barrier IO handling
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/bio.h>
# include <linux/blkdev.h>
# include "blk.h"
/**
* blk_queue_ordered - does this queue support ordered writes
* @ q : the request queue
* @ ordered : one of QUEUE_ORDERED_ *
* @ prepare_flush_fn : rq setup helper for cache flush ordered writes
*
* Description :
* For journalled file systems , doing ordered writes on a commit
* block instead of explicitly doing wait_on_buffer ( which is bad
* for performance ) can be a big win . Block drivers supporting this
* feature should call this function and indicate so .
*
* */
int blk_queue_ordered ( struct request_queue * q , unsigned ordered ,
prepare_flush_fn * prepare_flush_fn )
{
2008-11-28 07:32:02 +03:00
if ( ! prepare_flush_fn & & ( ordered & ( QUEUE_ORDERED_DO_PREFLUSH |
QUEUE_ORDERED_DO_POSTFLUSH ) ) ) {
2008-05-01 15:35:17 +04:00
printk ( KERN_ERR " %s: prepare_flush_fn required \n " , __func__ ) ;
2008-01-29 16:53:40 +03:00
return - EINVAL ;
}
if ( ordered ! = QUEUE_ORDERED_NONE & &
ordered ! = QUEUE_ORDERED_DRAIN & &
ordered ! = QUEUE_ORDERED_DRAIN_FLUSH & &
ordered ! = QUEUE_ORDERED_DRAIN_FUA & &
ordered ! = QUEUE_ORDERED_TAG & &
ordered ! = QUEUE_ORDERED_TAG_FLUSH & &
ordered ! = QUEUE_ORDERED_TAG_FUA ) {
printk ( KERN_ERR " blk_queue_ordered: bad value %d \n " , ordered ) ;
return - EINVAL ;
}
q - > ordered = ordered ;
q - > next_ordered = ordered ;
q - > prepare_flush_fn = prepare_flush_fn ;
return 0 ;
}
EXPORT_SYMBOL ( blk_queue_ordered ) ;
/*
* Cache flushing for ordered writes handling
*/
2008-04-29 11:49:06 +04:00
unsigned blk_ordered_cur_seq ( struct request_queue * q )
2008-01-29 16:53:40 +03:00
{
if ( ! q - > ordseq )
return 0 ;
return 1 < < ffz ( q - > ordseq ) ;
}
unsigned blk_ordered_req_seq ( struct request * rq )
{
struct request_queue * q = rq - > q ;
BUG_ON ( q - > ordseq = = 0 ) ;
if ( rq = = & q - > pre_flush_rq )
return QUEUE_ORDSEQ_PREFLUSH ;
if ( rq = = & q - > bar_rq )
return QUEUE_ORDSEQ_BAR ;
if ( rq = = & q - > post_flush_rq )
return QUEUE_ORDSEQ_POSTFLUSH ;
/*
* ! fs requests don ' t need to follow barrier ordering . Always
* put them at the front . This fixes the following deadlock .
*
* http : //thread.gmane.org/gmane.linux.kernel/537473
*/
if ( ! blk_fs_request ( rq ) )
return QUEUE_ORDSEQ_DRAIN ;
if ( ( rq - > cmd_flags & REQ_ORDERED_COLOR ) = =
( q - > orig_bar_rq - > cmd_flags & REQ_ORDERED_COLOR ) )
return QUEUE_ORDSEQ_DRAIN ;
else
return QUEUE_ORDSEQ_DONE ;
}
2008-11-28 07:32:05 +03:00
bool blk_ordered_complete_seq ( struct request_queue * q , unsigned seq , int error )
2008-01-29 16:53:40 +03:00
{
struct request * rq ;
if ( error & & ! q - > orderr )
q - > orderr = error ;
BUG_ON ( q - > ordseq & seq ) ;
q - > ordseq | = seq ;
if ( blk_ordered_cur_seq ( q ) ! = QUEUE_ORDSEQ_DONE )
2008-11-28 07:32:05 +03:00
return false ;
2008-01-29 16:53:40 +03:00
/*
* Okay , sequence complete .
*/
q - > ordseq = 0 ;
rq = q - > orig_bar_rq ;
2009-04-23 06:05:19 +04:00
__blk_end_request_all ( rq , q - > orderr ) ;
2008-11-28 07:32:05 +03:00
return true ;
2008-01-29 16:53:40 +03:00
}
static void pre_flush_end_io ( struct request * rq , int error )
{
elv_completed_request ( rq - > q , rq ) ;
blk_ordered_complete_seq ( rq - > q , QUEUE_ORDSEQ_PREFLUSH , error ) ;
}
static void bar_end_io ( struct request * rq , int error )
{
elv_completed_request ( rq - > q , rq ) ;
blk_ordered_complete_seq ( rq - > q , QUEUE_ORDSEQ_BAR , error ) ;
}
static void post_flush_end_io ( struct request * rq , int error )
{
elv_completed_request ( rq - > q , rq ) ;
blk_ordered_complete_seq ( rq - > q , QUEUE_ORDSEQ_POSTFLUSH , error ) ;
}
static void queue_flush ( struct request_queue * q , unsigned which )
{
struct request * rq ;
rq_end_io_fn * end_io ;
2008-11-28 07:32:02 +03:00
if ( which = = QUEUE_ORDERED_DO_PREFLUSH ) {
2008-01-29 16:53:40 +03:00
rq = & q - > pre_flush_rq ;
end_io = pre_flush_end_io ;
} else {
rq = & q - > post_flush_rq ;
end_io = post_flush_end_io ;
}
2008-04-29 11:54:36 +04:00
blk_rq_init ( q , rq ) ;
2008-04-25 14:26:28 +04:00
rq - > cmd_flags = REQ_HARDBARRIER ;
2008-01-29 16:53:40 +03:00
rq - > rq_disk = q - > bar_rq . rq_disk ;
rq - > end_io = end_io ;
q - > prepare_flush_fn ( q , rq ) ;
elv_insert ( q , rq , ELEVATOR_INSERT_FRONT ) ;
}
2008-11-28 07:32:05 +03:00
static inline bool start_ordered ( struct request_queue * q , struct request * * rqp )
2008-01-29 16:53:40 +03:00
{
2008-11-28 07:32:05 +03:00
struct request * rq = * rqp ;
unsigned skip = 0 ;
2008-01-29 16:53:40 +03:00
q - > orderr = 0 ;
q - > ordered = q - > next_ordered ;
q - > ordseq | = QUEUE_ORDSEQ_STARTED ;
2008-11-28 07:32:06 +03:00
/*
* For an empty barrier , there ' s no actual BAR request , which
* in turn makes POSTFLUSH unnecessary . Mask them off .
*/
2009-05-07 17:24:38 +04:00
if ( ! blk_rq_sectors ( rq ) ) {
2008-11-28 07:32:06 +03:00
q - > ordered & = ~ ( QUEUE_ORDERED_DO_BAR |
QUEUE_ORDERED_DO_POSTFLUSH ) ;
2008-11-28 07:32:07 +03:00
/*
* Empty barrier on a write - through device w / ordered
* tag has no command to issue and without any command
* to issue , ordering by tag can ' t be used . Drain
* instead .
*/
if ( ( q - > ordered & QUEUE_ORDERED_BY_TAG ) & &
! ( q - > ordered & QUEUE_ORDERED_DO_PREFLUSH ) ) {
q - > ordered & = ~ QUEUE_ORDERED_BY_TAG ;
q - > ordered | = QUEUE_ORDERED_BY_DRAIN ;
}
}
2008-11-28 07:32:06 +03:00
2008-11-28 07:32:04 +03:00
/* stash away the original request */
2009-05-08 06:54:16 +04:00
blk_dequeue_request ( rq ) ;
2008-01-29 16:53:40 +03:00
q - > orig_bar_rq = rq ;
2008-11-28 07:32:04 +03:00
rq = NULL ;
2008-01-29 16:53:40 +03:00
/*
* Queue ordered sequence . As we stack them at the head , we
* need to queue in reverse order . Note that we rely on that
* no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
2008-11-28 07:32:06 +03:00
* request gets inbetween ordered sequence .
2008-01-29 16:53:40 +03:00
*/
2008-11-28 07:32:06 +03:00
if ( q - > ordered & QUEUE_ORDERED_DO_POSTFLUSH ) {
2008-11-28 07:32:02 +03:00
queue_flush ( q , QUEUE_ORDERED_DO_POSTFLUSH ) ;
2008-11-28 07:32:04 +03:00
rq = & q - > post_flush_rq ;
} else
2008-11-28 07:32:05 +03:00
skip | = QUEUE_ORDSEQ_POSTFLUSH ;
2008-01-29 16:53:40 +03:00
2008-11-28 07:32:04 +03:00
if ( q - > ordered & QUEUE_ORDERED_DO_BAR ) {
rq = & q - > bar_rq ;
/* initialize proxy request and queue it */
blk_rq_init ( q , rq ) ;
if ( bio_data_dir ( q - > orig_bar_rq - > bio ) = = WRITE )
rq - > cmd_flags | = REQ_RW ;
if ( q - > ordered & QUEUE_ORDERED_DO_FUA )
rq - > cmd_flags | = REQ_FUA ;
init_request_from_bio ( rq , q - > orig_bar_rq - > bio ) ;
rq - > end_io = bar_end_io ;
elv_insert ( q , rq , ELEVATOR_INSERT_FRONT ) ;
} else
2008-11-28 07:32:05 +03:00
skip | = QUEUE_ORDSEQ_BAR ;
2008-01-29 16:53:40 +03:00
2008-11-28 07:32:02 +03:00
if ( q - > ordered & QUEUE_ORDERED_DO_PREFLUSH ) {
queue_flush ( q , QUEUE_ORDERED_DO_PREFLUSH ) ;
2008-01-29 16:53:40 +03:00
rq = & q - > pre_flush_rq ;
} else
2008-11-28 07:32:05 +03:00
skip | = QUEUE_ORDSEQ_PREFLUSH ;
2008-01-29 16:53:40 +03:00
2009-05-20 10:54:31 +04:00
if ( ( q - > ordered & QUEUE_ORDERED_BY_DRAIN ) & & queue_in_flight ( q ) )
2008-01-29 16:53:40 +03:00
rq = NULL ;
2008-11-28 07:32:04 +03:00
else
2008-11-28 07:32:05 +03:00
skip | = QUEUE_ORDSEQ_DRAIN ;
2008-01-29 16:53:40 +03:00
2008-11-28 07:32:05 +03:00
* rqp = rq ;
/*
* Complete skipped sequences . If whole sequence is complete ,
* return false to tell elevator that this request is gone .
*/
return ! blk_ordered_complete_seq ( q , skip , 0 ) ;
2008-01-29 16:53:40 +03:00
}
2008-11-28 07:32:05 +03:00
bool blk_do_ordered ( struct request_queue * q , struct request * * rqp )
2008-01-29 16:53:40 +03:00
{
struct request * rq = * rqp ;
const int is_barrier = blk_fs_request ( rq ) & & blk_barrier_rq ( rq ) ;
if ( ! q - > ordseq ) {
if ( ! is_barrier )
2008-11-28 07:32:05 +03:00
return true ;
2008-01-29 16:53:40 +03:00
2008-11-28 07:32:05 +03:00
if ( q - > next_ordered ! = QUEUE_ORDERED_NONE )
return start_ordered ( q , rqp ) ;
else {
2008-01-29 16:53:40 +03:00
/*
2008-11-28 07:32:03 +03:00
* Queue ordering not supported . Terminate
* with prejudice .
2008-01-29 16:53:40 +03:00
*/
2009-05-08 06:54:16 +04:00
blk_dequeue_request ( rq ) ;
2009-04-23 06:05:19 +04:00
__blk_end_request_all ( rq , - EOPNOTSUPP ) ;
2008-01-29 16:53:40 +03:00
* rqp = NULL ;
2008-11-28 07:32:05 +03:00
return false ;
2008-01-29 16:53:40 +03:00
}
}
/*
* Ordered sequence in progress
*/
/* Special requests are not subject to ordering rules. */
if ( ! blk_fs_request ( rq ) & &
rq ! = & q - > pre_flush_rq & & rq ! = & q - > post_flush_rq )
2008-11-28 07:32:05 +03:00
return true ;
2008-01-29 16:53:40 +03:00
2008-11-28 07:32:02 +03:00
if ( q - > ordered & QUEUE_ORDERED_BY_TAG ) {
2008-01-29 16:53:40 +03:00
/* Ordered by tag. Blocking the next barrier is enough. */
if ( is_barrier & & rq ! = & q - > bar_rq )
* rqp = NULL ;
} else {
/* Ordered by draining. Wait for turn. */
WARN_ON ( blk_ordered_req_seq ( rq ) < blk_ordered_cur_seq ( q ) ) ;
if ( blk_ordered_req_seq ( rq ) > blk_ordered_cur_seq ( q ) )
* rqp = NULL ;
}
2008-11-28 07:32:05 +03:00
return true ;
2008-01-29 16:53:40 +03:00
}
static void bio_end_empty_barrier ( struct bio * bio , int err )
{
2008-03-04 13:47:46 +03:00
if ( err ) {
if ( err = = - EOPNOTSUPP )
set_bit ( BIO_EOPNOTSUPP , & bio - > bi_flags ) ;
2008-01-29 16:53:40 +03:00
clear_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
2008-03-04 13:47:46 +03:00
}
2008-01-29 16:53:40 +03:00
complete ( bio - > bi_private ) ;
}
/**
* blkdev_issue_flush - queue a flush
* @ bdev : blockdev to issue flush for
* @ error_sector : error sector
*
* Description :
* Issue a flush for the block device in question . Caller can supply
* room for storing the error offset in case of a flush error , if they
2009-01-13 17:27:32 +03:00
* wish to .
2008-01-29 16:53:40 +03:00
*/
int blkdev_issue_flush ( struct block_device * bdev , sector_t * error_sector )
{
DECLARE_COMPLETION_ONSTACK ( wait ) ;
struct request_queue * q ;
struct bio * bio ;
int ret ;
if ( bdev - > bd_disk = = NULL )
return - ENXIO ;
q = bdev_get_queue ( bdev ) ;
if ( ! q )
return - ENXIO ;
bio = bio_alloc ( GFP_KERNEL , 0 ) ;
bio - > bi_end_io = bio_end_empty_barrier ;
bio - > bi_private = & wait ;
bio - > bi_bdev = bdev ;
2008-08-11 20:07:08 +04:00
submit_bio ( WRITE_BARRIER , bio ) ;
2008-01-29 16:53:40 +03:00
wait_for_completion ( & wait ) ;
/*
* The driver must store the error location in - > bi_sector , if
* it supports it . For non - stacked drivers , this should be copied
2009-05-07 17:24:39 +04:00
* from blk_rq_pos ( rq ) .
2008-01-29 16:53:40 +03:00
*/
if ( error_sector )
* error_sector = bio - > bi_sector ;
ret = 0 ;
2008-03-04 13:47:46 +03:00
if ( bio_flagged ( bio , BIO_EOPNOTSUPP ) )
ret = - EOPNOTSUPP ;
else if ( ! bio_flagged ( bio , BIO_UPTODATE ) )
2008-01-29 16:53:40 +03:00
ret = - EIO ;
bio_put ( bio ) ;
return ret ;
}
EXPORT_SYMBOL ( blkdev_issue_flush ) ;
2008-08-05 21:01:53 +04:00
static void blkdev_discard_end_io ( struct bio * bio , int err )
{
if ( err ) {
if ( err = = - EOPNOTSUPP )
set_bit ( BIO_EOPNOTSUPP , & bio - > bi_flags ) ;
clear_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
}
2009-09-12 09:35:43 +04:00
if ( bio - > bi_private )
complete ( bio - > bi_private ) ;
2009-09-30 15:52:12 +04:00
__free_page ( bio_page ( bio ) ) ;
2009-09-12 09:35:43 +04:00
2008-08-05 21:01:53 +04:00
bio_put ( bio ) ;
}
/**
* blkdev_issue_discard - queue a discard
* @ bdev : blockdev to issue discard for
* @ sector : start sector
* @ nr_sects : number of sectors to discard
2008-09-11 12:57:55 +04:00
* @ gfp_mask : memory allocation flags ( for bio_alloc )
2009-09-12 09:35:43 +04:00
* @ flags : DISCARD_FL_ * flags to control behaviour
2008-08-05 21:01:53 +04:00
*
* Description :
2009-09-12 09:35:43 +04:00
* Issue a discard request for the sectors in question .
2008-08-05 21:01:53 +04:00
*/
2009-09-12 09:35:43 +04:00
int blkdev_issue_discard ( struct block_device * bdev , sector_t sector ,
sector_t nr_sects , gfp_t gfp_mask , int flags )
2008-08-05 21:01:53 +04:00
{
2009-09-12 09:35:43 +04:00
DECLARE_COMPLETION_ONSTACK ( wait ) ;
struct request_queue * q = bdev_get_queue ( bdev ) ;
int type = flags & DISCARD_FL_BARRIER ?
DISCARD_BARRIER : DISCARD_NOBARRIER ;
2009-09-30 15:52:12 +04:00
struct bio * bio ;
struct page * page ;
2008-08-05 21:01:53 +04:00
int ret = 0 ;
if ( ! q )
return - ENXIO ;
2009-09-30 15:52:12 +04:00
if ( ! blk_queue_discard ( q ) )
2008-08-05 21:01:53 +04:00
return - EOPNOTSUPP ;
while ( nr_sects & & ! ret ) {
2009-09-30 15:52:12 +04:00
unsigned int sector_size = q - > limits . logical_block_size ;
2009-09-30 15:54:20 +04:00
unsigned int max_discard_sectors =
min ( q - > limits . max_discard_sectors , UINT_MAX > > 9 ) ;
2008-08-05 21:01:53 +04:00
2009-09-30 15:52:12 +04:00
bio = bio_alloc ( gfp_mask , 1 ) ;
if ( ! bio )
goto out ;
bio - > bi_sector = sector ;
2008-08-05 21:01:53 +04:00
bio - > bi_end_io = blkdev_discard_end_io ;
bio - > bi_bdev = bdev ;
2009-09-12 09:35:43 +04:00
if ( flags & DISCARD_FL_WAIT )
bio - > bi_private = & wait ;
2008-08-05 21:01:53 +04:00
2009-09-30 15:52:12 +04:00
/*
* Add a zeroed one - sector payload as that ' s what
* our current implementations need . If we ' ll ever need
* more the interface will need revisiting .
*/
page = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
if ( ! page )
goto out_free_bio ;
if ( bio_add_pc_page ( q , bio , page , sector_size , 0 ) < sector_size )
goto out_free_page ;
2008-08-05 21:01:53 +04:00
2009-09-30 15:52:12 +04:00
/*
* And override the bio size - the way discard works we
* touch many more blocks on disk than the actual payload
* length .
*/
2009-09-30 15:54:20 +04:00
if ( nr_sects > max_discard_sectors ) {
bio - > bi_size = max_discard_sectors < < 9 ;
nr_sects - = max_discard_sectors ;
sector + = max_discard_sectors ;
2008-08-05 21:01:53 +04:00
} else {
bio - > bi_size = nr_sects < < 9 ;
nr_sects = 0 ;
}
2009-09-12 09:35:43 +04:00
2008-08-05 21:01:53 +04:00
bio_get ( bio ) ;
2009-09-12 09:35:43 +04:00
submit_bio ( type , bio ) ;
if ( flags & DISCARD_FL_WAIT )
wait_for_completion ( & wait ) ;
2008-08-05 21:01:53 +04:00
if ( bio_flagged ( bio , BIO_EOPNOTSUPP ) )
ret = - EOPNOTSUPP ;
else if ( ! bio_flagged ( bio , BIO_UPTODATE ) )
ret = - EIO ;
bio_put ( bio ) ;
}
return ret ;
2009-09-30 15:52:12 +04:00
out_free_page :
__free_page ( page ) ;
out_free_bio :
bio_put ( bio ) ;
out :
return - ENOMEM ;
2008-08-05 21:01:53 +04:00
}
EXPORT_SYMBOL ( blkdev_issue_discard ) ;