2008-01-29 16:04:06 +03:00
/*
* Functions related to segment and merge handling
*/
# include <linux/kernel.h>
# include <linux/module.h>
# include <linux/bio.h>
# include <linux/blkdev.h>
# include <linux/scatterlist.h>
# include "blk.h"
2009-02-23 11:03:10 +03:00
static unsigned int __blk_recalc_rq_segments ( struct request_queue * q ,
2009-03-06 10:55:24 +03:00
struct bio * bio )
2008-01-29 16:04:06 +03:00
{
struct bio_vec * bv , * bvprv = NULL ;
2009-02-23 11:03:10 +03:00
int cluster , i , high , highprv = 1 ;
unsigned int seg_size , nr_phys_segs ;
2009-03-06 10:55:24 +03:00
struct bio * fbio , * bbio ;
2008-01-29 16:04:06 +03:00
2009-02-23 11:03:10 +03:00
if ( ! bio )
return 0 ;
2008-01-29 16:04:06 +03:00
2009-02-23 11:03:10 +03:00
fbio = bio ;
2010-12-01 21:41:49 +03:00
cluster = blk_queue_cluster ( q ) ;
2008-08-15 12:20:02 +04:00
seg_size = 0 ;
2010-06-21 13:02:47 +04:00
nr_phys_segs = 0 ;
2009-02-23 11:03:10 +03:00
for_each_bio ( bio ) {
bio_for_each_segment ( bv , bio , i ) {
/*
* the trick here is making sure that a high page is
* never considered part of another segment , since that
* might change with the bounce page .
*/
2009-05-23 01:17:50 +04:00
high = page_to_pfn ( bv - > bv_page ) > queue_bounce_pfn ( q ) ;
2009-02-23 11:03:10 +03:00
if ( high | | highprv )
2008-01-29 16:04:06 +03:00
goto new_segment ;
2009-02-23 11:03:10 +03:00
if ( cluster ) {
2009-05-23 01:17:50 +04:00
if ( seg_size + bv - > bv_len
> queue_max_segment_size ( q ) )
2009-02-23 11:03:10 +03:00
goto new_segment ;
if ( ! BIOVEC_PHYS_MERGEABLE ( bvprv , bv ) )
goto new_segment ;
if ( ! BIOVEC_SEG_BOUNDARY ( q , bvprv , bv ) )
goto new_segment ;
2008-01-29 16:04:06 +03:00
2009-02-23 11:03:10 +03:00
seg_size + = bv - > bv_len ;
bvprv = bv ;
continue ;
}
2008-01-29 16:04:06 +03:00
new_segment :
2009-02-23 11:03:10 +03:00
if ( nr_phys_segs = = 1 & & seg_size >
fbio - > bi_seg_front_size )
fbio - > bi_seg_front_size = seg_size ;
2008-10-13 16:19:05 +04:00
2009-02-23 11:03:10 +03:00
nr_phys_segs + + ;
bvprv = bv ;
seg_size = bv - > bv_len ;
highprv = high ;
}
2009-03-06 10:55:24 +03:00
bbio = bio ;
2008-01-29 16:04:06 +03:00
}
2009-03-06 10:55:24 +03:00
if ( nr_phys_segs = = 1 & & seg_size > fbio - > bi_seg_front_size )
fbio - > bi_seg_front_size = seg_size ;
if ( seg_size > bbio - > bi_seg_back_size )
bbio - > bi_seg_back_size = seg_size ;
2009-02-23 11:03:10 +03:00
return nr_phys_segs ;
}
void blk_recalc_rq_segments ( struct request * rq )
{
2009-03-06 10:55:24 +03:00
rq - > nr_phys_segments = __blk_recalc_rq_segments ( rq - > q , rq - > bio ) ;
2008-01-29 16:04:06 +03:00
}
void blk_recount_segments ( struct request_queue * q , struct bio * bio )
{
struct bio * nxt = bio - > bi_next ;
2009-02-23 11:03:10 +03:00
2008-01-29 16:04:06 +03:00
bio - > bi_next = NULL ;
2009-03-06 10:55:24 +03:00
bio - > bi_phys_segments = __blk_recalc_rq_segments ( q , bio ) ;
2008-01-29 16:04:06 +03:00
bio - > bi_next = nxt ;
bio - > bi_flags | = ( 1 < < BIO_SEG_VALID ) ;
}
EXPORT_SYMBOL ( blk_recount_segments ) ;
static int blk_phys_contig_segment ( struct request_queue * q , struct bio * bio ,
struct bio * nxt )
{
2010-12-01 21:41:49 +03:00
if ( ! blk_queue_cluster ( q ) )
2008-01-29 16:04:06 +03:00
return 0 ;
2008-10-13 16:19:05 +04:00
if ( bio - > bi_seg_back_size + nxt - > bi_seg_front_size >
2009-05-23 01:17:50 +04:00
queue_max_segment_size ( q ) )
2008-01-29 16:04:06 +03:00
return 0 ;
2008-08-09 19:42:20 +04:00
if ( ! bio_has_data ( bio ) )
return 1 ;
if ( ! BIOVEC_PHYS_MERGEABLE ( __BVEC_END ( bio ) , __BVEC_START ( nxt ) ) )
return 0 ;
2008-01-29 16:04:06 +03:00
/*
2008-08-09 19:42:20 +04:00
* bio and nxt are contiguous in memory ; check if the queue allows
2008-01-29 16:04:06 +03:00
* these two to be merged into one
*/
if ( BIO_SEG_BOUNDARY ( q , bio , nxt ) )
return 1 ;
return 0 ;
}
/*
* map a request to scatterlist , return number of sg entries setup . Caller
* must make sure sg can hold rq - > nr_phys_segments entries
*/
int blk_rq_map_sg ( struct request_queue * q , struct request * rq ,
struct scatterlist * sglist )
{
struct bio_vec * bvec , * bvprv ;
struct req_iterator iter ;
struct scatterlist * sg ;
int nsegs , cluster ;
nsegs = 0 ;
2010-12-01 21:41:49 +03:00
cluster = blk_queue_cluster ( q ) ;
2008-01-29 16:04:06 +03:00
/*
* for each bio in rq
*/
bvprv = NULL ;
sg = NULL ;
rq_for_each_segment ( bvec , rq , iter ) {
int nbytes = bvec - > bv_len ;
if ( bvprv & & cluster ) {
2009-05-23 01:17:50 +04:00
if ( sg - > length + nbytes > queue_max_segment_size ( q ) )
2008-01-29 16:04:06 +03:00
goto new_segment ;
if ( ! BIOVEC_PHYS_MERGEABLE ( bvprv , bvec ) )
goto new_segment ;
if ( ! BIOVEC_SEG_BOUNDARY ( q , bvprv , bvec ) )
goto new_segment ;
sg - > length + = nbytes ;
} else {
new_segment :
if ( ! sg )
sg = sglist ;
else {
/*
* If the driver previously mapped a shorter
* list , we could see a termination bit
* prematurely unless it fully inits the sg
* table on each mapping . We KNOW that there
* must be more entries here or the driver
* would be buggy , so force clear the
* termination bit to avoid doing a full
* sg_init_table ( ) in drivers for each command .
*/
sg - > page_link & = ~ 0x02 ;
sg = sg_next ( sg ) ;
}
sg_set_page ( sg , bvec - > bv_page , nbytes , bvec - > bv_offset ) ;
nsegs + + ;
}
bvprv = bvec ;
} /* segments in rq */
2008-04-11 14:56:52 +04:00
if ( unlikely ( rq - > cmd_flags & REQ_COPY_USER ) & &
2009-05-07 17:24:41 +04:00
( blk_rq_bytes ( rq ) & q - > dma_pad_mask ) ) {
unsigned int pad_len =
( q - > dma_pad_mask & ~ blk_rq_bytes ( rq ) ) + 1 ;
2008-04-11 14:56:52 +04:00
sg - > length + = pad_len ;
rq - > extra_len + = pad_len ;
}
2008-02-19 13:36:53 +03:00
if ( q - > dma_drain_size & & q - > dma_drain_needed ( rq ) ) {
2010-08-07 20:20:39 +04:00
if ( rq - > cmd_flags & REQ_WRITE )
2008-02-19 13:36:55 +03:00
memset ( q - > dma_drain_buffer , 0 , q - > dma_drain_size ) ;
2008-01-29 16:04:06 +03:00
sg - > page_link & = ~ 0x02 ;
sg = sg_next ( sg ) ;
sg_set_page ( sg , virt_to_page ( q - > dma_drain_buffer ) ,
q - > dma_drain_size ,
( ( unsigned long ) q - > dma_drain_buffer ) &
( PAGE_SIZE - 1 ) ) ;
nsegs + + ;
2008-03-04 13:17:11 +03:00
rq - > extra_len + = q - > dma_drain_size ;
2008-01-29 16:04:06 +03:00
}
if ( sg )
sg_mark_end ( sg ) ;
return nsegs ;
}
EXPORT_SYMBOL ( blk_rq_map_sg ) ;
static inline int ll_new_hw_segment ( struct request_queue * q ,
struct request * req ,
struct bio * bio )
{
int nr_phys_segs = bio_phys_segments ( q , bio ) ;
2010-09-10 22:50:10 +04:00
if ( req - > nr_phys_segments + nr_phys_segs > queue_max_segments ( q ) )
goto no_merge ;
if ( bio_integrity ( bio ) & & blk_integrity_merge_bio ( q , req , bio ) )
goto no_merge ;
2008-01-29 16:04:06 +03:00
/*
* This will form the start of a new hw segment . Bump both
* counters .
*/
req - > nr_phys_segments + = nr_phys_segs ;
return 1 ;
2010-09-10 22:50:10 +04:00
no_merge :
req - > cmd_flags | = REQ_NOMERGE ;
if ( req = = q - > last_merge )
q - > last_merge = NULL ;
return 0 ;
2008-01-29 16:04:06 +03:00
}
int ll_back_merge_fn ( struct request_queue * q , struct request * req ,
struct bio * bio )
{
unsigned short max_sectors ;
2010-08-07 20:17:56 +04:00
if ( unlikely ( req - > cmd_type = = REQ_TYPE_BLOCK_PC ) )
2009-05-23 01:17:50 +04:00
max_sectors = queue_max_hw_sectors ( q ) ;
2008-01-29 16:04:06 +03:00
else
2009-05-23 01:17:50 +04:00
max_sectors = queue_max_sectors ( q ) ;
2008-01-29 16:04:06 +03:00
2009-05-07 17:24:39 +04:00
if ( blk_rq_sectors ( req ) + bio_sectors ( bio ) > max_sectors ) {
2008-01-29 16:04:06 +03:00
req - > cmd_flags | = REQ_NOMERGE ;
if ( req = = q - > last_merge )
q - > last_merge = NULL ;
return 0 ;
}
2008-05-07 11:33:55 +04:00
if ( ! bio_flagged ( req - > biotail , BIO_SEG_VALID ) )
2008-01-29 16:04:06 +03:00
blk_recount_segments ( q , req - > biotail ) ;
2008-05-07 11:33:55 +04:00
if ( ! bio_flagged ( bio , BIO_SEG_VALID ) )
2008-01-29 16:04:06 +03:00
blk_recount_segments ( q , bio ) ;
return ll_new_hw_segment ( q , req , bio ) ;
}
2008-01-31 15:03:55 +03:00
int ll_front_merge_fn ( struct request_queue * q , struct request * req ,
2008-01-29 16:04:06 +03:00
struct bio * bio )
{
unsigned short max_sectors ;
2010-08-07 20:17:56 +04:00
if ( unlikely ( req - > cmd_type = = REQ_TYPE_BLOCK_PC ) )
2009-05-23 01:17:50 +04:00
max_sectors = queue_max_hw_sectors ( q ) ;
2008-01-29 16:04:06 +03:00
else
2009-05-23 01:17:50 +04:00
max_sectors = queue_max_sectors ( q ) ;
2008-01-29 16:04:06 +03:00
2009-05-07 17:24:39 +04:00
if ( blk_rq_sectors ( req ) + bio_sectors ( bio ) > max_sectors ) {
2008-01-29 16:04:06 +03:00
req - > cmd_flags | = REQ_NOMERGE ;
if ( req = = q - > last_merge )
q - > last_merge = NULL ;
return 0 ;
}
2008-05-07 11:33:55 +04:00
if ( ! bio_flagged ( bio , BIO_SEG_VALID ) )
2008-01-29 16:04:06 +03:00
blk_recount_segments ( q , bio ) ;
2008-05-07 11:33:55 +04:00
if ( ! bio_flagged ( req - > bio , BIO_SEG_VALID ) )
2008-01-29 16:04:06 +03:00
blk_recount_segments ( q , req - > bio ) ;
return ll_new_hw_segment ( q , req , bio ) ;
}
static int ll_merge_requests_fn ( struct request_queue * q , struct request * req ,
struct request * next )
{
int total_phys_segments ;
2008-10-13 16:19:05 +04:00
unsigned int seg_size =
req - > biotail - > bi_seg_back_size + next - > bio - > bi_seg_front_size ;
2008-01-29 16:04:06 +03:00
/*
* First check if the either of the requests are re - queued
* requests . Can ' t merge them if they are .
*/
if ( req - > special | | next - > special )
return 0 ;
/*
* Will it become too large ?
*/
2009-05-23 01:17:50 +04:00
if ( ( blk_rq_sectors ( req ) + blk_rq_sectors ( next ) ) > queue_max_sectors ( q ) )
2008-01-29 16:04:06 +03:00
return 0 ;
total_phys_segments = req - > nr_phys_segments + next - > nr_phys_segments ;
2008-10-13 16:19:05 +04:00
if ( blk_phys_contig_segment ( q , req - > biotail , next - > bio ) ) {
if ( req - > nr_phys_segments = = 1 )
req - > bio - > bi_seg_front_size = seg_size ;
if ( next - > nr_phys_segments = = 1 )
next - > biotail - > bi_seg_back_size = seg_size ;
2008-01-29 16:04:06 +03:00
total_phys_segments - - ;
2008-10-13 16:19:05 +04:00
}
2008-01-29 16:04:06 +03:00
2010-02-26 08:20:39 +03:00
if ( total_phys_segments > queue_max_segments ( q ) )
2008-01-29 16:04:06 +03:00
return 0 ;
2010-09-10 22:50:10 +04:00
if ( blk_integrity_rq ( req ) & & blk_integrity_merge_rq ( q , req , next ) )
return 0 ;
2008-01-29 16:04:06 +03:00
/* Merge is OK... */
req - > nr_phys_segments = total_phys_segments ;
return 1 ;
}
2009-07-03 12:48:17 +04:00
/**
* blk_rq_set_mixed_merge - mark a request as mixed merge
* @ rq : request to mark as mixed merge
*
* Description :
* @ rq is about to be mixed merged . Make sure the attributes
* which can be mixed are set in each bio and mark @ rq as mixed
* merged .
*/
void blk_rq_set_mixed_merge ( struct request * rq )
{
unsigned int ff = rq - > cmd_flags & REQ_FAILFAST_MASK ;
struct bio * bio ;
if ( rq - > cmd_flags & REQ_MIXED_MERGE )
return ;
/*
* @ rq will no longer represent mixable attributes for all the
* contained bios . It will just track those of the first one .
* Distributes the attributs to each bio .
*/
for ( bio = rq - > bio ; bio ; bio = bio - > bi_next ) {
WARN_ON_ONCE ( ( bio - > bi_rw & REQ_FAILFAST_MASK ) & &
( bio - > bi_rw & REQ_FAILFAST_MASK ) ! = ff ) ;
bio - > bi_rw | = ff ;
}
rq - > cmd_flags | = REQ_MIXED_MERGE ;
}
2009-03-27 12:31:51 +03:00
static void blk_account_io_merge ( struct request * req )
{
if ( blk_do_io_stat ( req ) ) {
struct hd_struct * part ;
int cpu ;
cpu = part_stat_lock ( ) ;
2011-01-05 18:57:38 +03:00
part = req - > part ;
2009-03-27 12:31:51 +03:00
part_round_stats ( cpu , part ) ;
block: Seperate read and write statistics of in_flight requests v2
Commit a9327cac440be4d8333bba975cbbf76045096275 added seperate read
and write statistics of in_flight requests. And exported the number
of read and write requests in progress seperately through sysfs.
But Corrado Zoccolo <czoccolo@gmail.com> reported getting strange
output from "iostat -kx 2". Global values for service time and
utilization were garbage. For interval values, utilization was always
100%, and service time is higher than normal.
So this was reverted by commit 0f78ab9899e9d6acb09d5465def618704255963b
The problem was in part_round_stats_single(), I missed the following:
if (now == part->stamp)
return;
- if (part->in_flight) {
+ if (part_in_flight(part)) {
__part_stat_add(cpu, part, time_in_queue,
part_in_flight(part) * (now - part->stamp));
__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
With this chunk included, the reported regression gets fixed.
Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
--
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
2009-10-06 22:16:55 +04:00
part_dec_in_flight ( part , rq_data_dir ( req ) ) ;
2009-03-27 12:31:51 +03:00
2011-01-07 10:43:37 +03:00
hd_struct_put ( part ) ;
2009-03-27 12:31:51 +03:00
part_stat_unlock ( ) ;
}
}
2008-01-29 16:04:06 +03:00
/*
* Has to be called with the request spinlock acquired
*/
static int attempt_merge ( struct request_queue * q , struct request * req ,
struct request * next )
{
if ( ! rq_mergeable ( req ) | | ! rq_mergeable ( next ) )
return 0 ;
2010-09-25 14:42:55 +04:00
/*
* Don ' t merge file system requests and discard requests
*/
if ( ( req - > cmd_flags & REQ_DISCARD ) ! = ( next - > cmd_flags & REQ_DISCARD ) )
return 0 ;
/*
* Don ' t merge discard requests and secure discard requests
*/
if ( ( req - > cmd_flags & REQ_SECURE ) ! = ( next - > cmd_flags & REQ_SECURE ) )
return 0 ;
2008-01-29 16:04:06 +03:00
/*
* not contiguous
*/
2009-05-07 17:24:39 +04:00
if ( blk_rq_pos ( req ) + blk_rq_sectors ( req ) ! = blk_rq_pos ( next ) )
2008-01-29 16:04:06 +03:00
return 0 ;
if ( rq_data_dir ( req ) ! = rq_data_dir ( next )
| | req - > rq_disk ! = next - > rq_disk
| | next - > special )
return 0 ;
/*
* If we are allowed to merge , then append bio list
* from next to rq and release next . merge_requests_fn
* will have updated segment counts , update sector
* counts here .
*/
if ( ! ll_merge_requests_fn ( q , req , next ) )
return 0 ;
2009-07-03 12:48:17 +04:00
/*
* If failfast settings disagree or any of the two is already
* a mixed merge , mark both as mixed before proceeding . This
* makes sure that all involved bios have mixable attributes
* set properly .
*/
if ( ( req - > cmd_flags | next - > cmd_flags ) & REQ_MIXED_MERGE | |
( req - > cmd_flags & REQ_FAILFAST_MASK ) ! =
( next - > cmd_flags & REQ_FAILFAST_MASK ) ) {
blk_rq_set_mixed_merge ( req ) ;
blk_rq_set_mixed_merge ( next ) ;
}
2008-01-29 16:04:06 +03:00
/*
* At this point we have either done a back merge
* or front merge . We need the smaller start_time of
* the merged requests to be the current request
* for accounting purposes .
*/
if ( time_after ( req - > start_time , next - > start_time ) )
req - > start_time = next - > start_time ;
req - > biotail - > bi_next = next - > bio ;
req - > biotail = next - > biotail ;
2009-05-07 17:24:44 +04:00
req - > __data_len + = blk_rq_bytes ( next ) ;
2008-01-29 16:04:06 +03:00
elv_merge_requests ( q , req , next ) ;
2009-04-22 16:01:49 +04:00
/*
* ' next ' is going away , so update stats accordingly
*/
blk_account_io_merge ( next ) ;
2008-01-29 16:04:06 +03:00
req - > ioprio = ioprio_best ( req - > ioprio , next - > ioprio ) ;
2008-08-26 12:25:02 +04:00
if ( blk_rq_cpu_valid ( next ) )
req - > cpu = next - > cpu ;
2008-01-29 16:04:06 +03:00
2009-03-24 14:35:07 +03:00
/* owner-ship of bio passed from next to req */
next - > bio = NULL ;
2008-01-29 16:04:06 +03:00
__blk_put_request ( q , next ) ;
return 1 ;
}
int attempt_back_merge ( struct request_queue * q , struct request * rq )
{
struct request * next = elv_latter_request ( q , rq ) ;
if ( next )
return attempt_merge ( q , rq , next ) ;
return 0 ;
}
int attempt_front_merge ( struct request_queue * q , struct request * rq )
{
struct request * prev = elv_former_request ( q , rq ) ;
if ( prev )
return attempt_merge ( q , prev , rq ) ;
return 0 ;
}
2011-03-21 12:14:27 +03:00
int blk_attempt_req_merge ( struct request_queue * q , struct request * rq ,
struct request * next )
{
return attempt_merge ( q , rq , next ) ;
}