2005-04-17 02:20:36 +04:00
/*
2006-09-04 17:41:16 +04:00
* Copyright ( C ) 2001 Jens Axboe < axboe @ kernel . dk >
2005-04-17 02:20:36 +04:00
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public Licens
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 -
*
*/
# include <linux/mm.h>
# include <linux/swap.h>
# include <linux/bio.h>
# include <linux/blkdev.h>
2012-03-06 01:15:27 +04:00
# include <linux/iocontext.h>
2005-04-17 02:20:36 +04:00
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/kernel.h>
2011-11-17 08:57:37 +04:00
# include <linux/export.h>
2005-04-17 02:20:36 +04:00
# include <linux/mempool.h>
# include <linux/workqueue.h>
2012-03-06 01:15:27 +04:00
# include <linux/cgroup.h>
2005-06-20 16:06:52 +04:00
# include <scsi/sg.h> /* for struct sg_iovec */
2005-04-17 02:20:36 +04:00
tracing/events: convert block trace points to TRACE_EVENT()
TRACE_EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:
- zero-copy and per-cpu splice() tracing
- binary tracing without printf overhead
- structured logging records exposed under /debug/tracing/events
- trace events embedded in function tracer output and other plugins
- user-defined, per tracepoint filter expressions
...
Cons:
- no dev_t info for the output of plug, unplug_timer and unplug_io events.
no dev_t info for getrq and sleeprq events if bio == NULL.
no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL.
This is mainly because we can't get the deivce from a request queue.
But this may change in the future.
- A packet command is converted to a string in TP_assign, not TP_print.
While blktrace do the convertion just before output.
Since pc requests should be rather rare, this is not a big issue.
- In blktrace, an event can have 2 different print formats, but a TRACE_EVENT
has a unique format, which means we have some unused data in a trace entry.
The overhead is minimized by using __dynamic_array() instead of __array().
I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing:
dd dd + ioctl blktrace dd + TRACE_EVENT (splice)
1 7.36s, 42.7 MB/s 7.50s, 42.0 MB/s 7.41s, 42.5 MB/s
2 7.43s, 42.3 MB/s 7.48s, 42.1 MB/s 7.43s, 42.4 MB/s
3 7.38s, 42.6 MB/s 7.45s, 42.2 MB/s 7.41s, 42.5 MB/s
So the overhead of tracing is very small, and no regression when using
those trace events vs blktrace.
And the binary output of TRACE_EVENT is much smaller than blktrace:
# ls -l -h
-rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0
-rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1
-rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out
Following are some comparisons between TRACE_EVENT and blktrace:
plug:
kjournald-480 [000] 303.084981: block_plug: [kjournald]
kjournald-480 [000] 303.084981: 8,0 P N [kjournald]
unplug_io:
kblockd/0-118 [000] 300.052973: block_unplug_io: [kblockd/0] 1
kblockd/0-118 [000] 300.052974: 8,0 U N [kblockd/0] 1
remap:
kjournald-480 [000] 303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384
kjournald-480 [000] 303.085043: 8,0 A W 102736992 + 8 <- (8,8) 33384
bio_backmerge:
kjournald-480 [000] 303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald]
kjournald-480 [000] 303.085086: 8,0 M W 102737032 + 8 [kjournald]
getrq:
kjournald-480 [000] 303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084975: 8,0 G W 102736984 + 8 [kjournald]
bash-2066 [001] 1072.953770: 8,0 G N [bash]
bash-2066 [001] 1072.953773: block_getrq: 0,0 N 0 + 0 [bash]
rq_complete:
konsole-2065 [001] 300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0]
konsole-2065 [001] 300.053191: 8,0 C W 103669040 + 16 [0]
ksoftirqd/1-7 [001] 1072.953811: 8,0 C N (5a 00 08 00 00 00 00 00 24 00) [0]
ksoftirqd/1-7 [001] 1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]
rq_insert:
kjournald-480 [000] 303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald]
kjournald-480 [000] 303.084986: 8,0 I W 102736984 + 8 [kjournald]
Changelog from v2 -> v3:
- use the newly introduced __dynamic_array().
Changelog from v1 -> v2:
- use __string() instead of __array() to minimize the memory required
to store hex dump of rq->cmd().
- support large pc requests.
- add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT.
- some cleanups.
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A2DF669.5070905@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
2009-06-09 09:43:05 +04:00
# include <trace/events/block.h>
2008-11-26 13:59:56 +03:00
2008-12-23 14:42:54 +03:00
/*
* Test patch to inline a certain number of bi_io_vec ' s inside the bio
* itself , to shrink a bio data allocation from two mempool calls to one
*/
# define BIO_INLINE_VECS 4
2008-10-09 10:57:05 +04:00
static mempool_t * bio_split_pool __read_mostly ;
2005-04-17 02:20:36 +04:00
/*
* if you change this list , also change bvec_alloc or things will
* break badly ! cannot be bigger than what you can fit into an
* unsigned short
*/
# define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
2011-03-08 10:28:01 +03:00
static struct biovec_slab bvec_slabs [ BIOVEC_NR_POOLS ] __read_mostly = {
2005-04-17 02:20:36 +04:00
BV ( 1 ) , BV ( 4 ) , BV ( 16 ) , BV ( 64 ) , BV ( 128 ) , BV ( BIO_MAX_PAGES ) ,
} ;
# undef BV
/*
* fs_bio_set is the bio_set containing bio and iovec memory pools used by
* IO code that does not need private memory pools .
*/
2008-06-17 20:59:56 +04:00
struct bio_set * fs_bio_set ;
2012-09-07 02:35:01 +04:00
EXPORT_SYMBOL ( fs_bio_set ) ;
2005-04-17 02:20:36 +04:00
2008-12-10 17:35:05 +03:00
/*
* Our slab pool management
*/
struct bio_slab {
struct kmem_cache * slab ;
unsigned int slab_ref ;
unsigned int slab_size ;
char name [ 8 ] ;
} ;
static DEFINE_MUTEX ( bio_slab_lock ) ;
static struct bio_slab * bio_slabs ;
static unsigned int bio_slab_nr , bio_slab_max ;
static struct kmem_cache * bio_find_or_create_slab ( unsigned int extra_size )
{
unsigned int sz = sizeof ( struct bio ) + extra_size ;
struct kmem_cache * slab = NULL ;
2012-08-09 17:19:25 +04:00
struct bio_slab * bslab , * new_bio_slabs ;
2012-10-22 23:53:36 +04:00
unsigned int new_bio_slab_max ;
2008-12-10 17:35:05 +03:00
unsigned int i , entry = - 1 ;
mutex_lock ( & bio_slab_lock ) ;
i = 0 ;
while ( i < bio_slab_nr ) {
2010-01-19 16:07:09 +03:00
bslab = & bio_slabs [ i ] ;
2008-12-10 17:35:05 +03:00
if ( ! bslab - > slab & & entry = = - 1 )
entry = i ;
else if ( bslab - > slab_size = = sz ) {
slab = bslab - > slab ;
bslab - > slab_ref + + ;
break ;
}
i + + ;
}
if ( slab )
goto out_unlock ;
if ( bio_slab_nr = = bio_slab_max & & entry = = - 1 ) {
2012-10-22 23:53:36 +04:00
new_bio_slab_max = bio_slab_max < < 1 ;
2012-08-09 17:19:25 +04:00
new_bio_slabs = krealloc ( bio_slabs ,
2012-10-22 23:53:36 +04:00
new_bio_slab_max * sizeof ( struct bio_slab ) ,
2012-08-09 17:19:25 +04:00
GFP_KERNEL ) ;
if ( ! new_bio_slabs )
2008-12-10 17:35:05 +03:00
goto out_unlock ;
2012-10-22 23:53:36 +04:00
bio_slab_max = new_bio_slab_max ;
2012-08-09 17:19:25 +04:00
bio_slabs = new_bio_slabs ;
2008-12-10 17:35:05 +03:00
}
if ( entry = = - 1 )
entry = bio_slab_nr + + ;
bslab = & bio_slabs [ entry ] ;
snprintf ( bslab - > name , sizeof ( bslab - > name ) , " bio-%d " , entry ) ;
slab = kmem_cache_create ( bslab - > name , sz , 0 , SLAB_HWCACHE_ALIGN , NULL ) ;
if ( ! slab )
goto out_unlock ;
2011-03-23 02:33:54 +03:00
printk ( KERN_INFO " bio: create slab <%s> at %d \n " , bslab - > name , entry ) ;
2008-12-10 17:35:05 +03:00
bslab - > slab = slab ;
bslab - > slab_ref = 1 ;
bslab - > slab_size = sz ;
out_unlock :
mutex_unlock ( & bio_slab_lock ) ;
return slab ;
}
static void bio_put_slab ( struct bio_set * bs )
{
struct bio_slab * bslab = NULL ;
unsigned int i ;
mutex_lock ( & bio_slab_lock ) ;
for ( i = 0 ; i < bio_slab_nr ; i + + ) {
if ( bs - > bio_slab = = bio_slabs [ i ] . slab ) {
bslab = & bio_slabs [ i ] ;
break ;
}
}
if ( WARN ( ! bslab , KERN_ERR " bio: unable to find slab! \n " ) )
goto out ;
WARN_ON ( ! bslab - > slab_ref ) ;
if ( - - bslab - > slab_ref )
goto out ;
kmem_cache_destroy ( bslab - > slab ) ;
bslab - > slab = NULL ;
out :
mutex_unlock ( & bio_slab_lock ) ;
}
2008-06-30 22:04:41 +04:00
unsigned int bvec_nr_vecs ( unsigned short idx )
{
return bvec_slabs [ idx ] . nr_vecs ;
}
2008-12-10 17:35:05 +03:00
void bvec_free_bs ( struct bio_set * bs , struct bio_vec * bv , unsigned int idx )
{
BIO_BUG_ON ( idx > = BIOVEC_NR_POOLS ) ;
if ( idx = = BIOVEC_MAX_IDX )
mempool_free ( bv , bs - > bvec_pool ) ;
else {
struct biovec_slab * bvs = bvec_slabs + idx ;
kmem_cache_free ( bvs - > slab , bv ) ;
}
}
2008-12-11 13:53:43 +03:00
struct bio_vec * bvec_alloc_bs ( gfp_t gfp_mask , int nr , unsigned long * idx ,
struct bio_set * bs )
2005-04-17 02:20:36 +04:00
{
struct bio_vec * bvl ;
2008-12-11 13:53:43 +03:00
/*
* see comment near bvec_array define !
*/
switch ( nr ) {
case 1 :
* idx = 0 ;
break ;
case 2 . . . 4 :
* idx = 1 ;
break ;
case 5 . . . 16 :
* idx = 2 ;
break ;
case 17 . . . 64 :
* idx = 3 ;
break ;
case 65 . . . 128 :
* idx = 4 ;
break ;
case 129 . . . BIO_MAX_PAGES :
* idx = 5 ;
break ;
default :
return NULL ;
}
/*
* idx now points to the pool we want to allocate from . only the
* 1 - vec entry pool is mempool backed .
*/
if ( * idx = = BIOVEC_MAX_IDX ) {
fallback :
bvl = mempool_alloc ( bs - > bvec_pool , gfp_mask ) ;
} else {
struct biovec_slab * bvs = bvec_slabs + * idx ;
gfp_t __gfp_mask = gfp_mask & ~ ( __GFP_WAIT | __GFP_IO ) ;
2008-09-11 15:17:37 +04:00
/*
2008-12-11 13:53:43 +03:00
* Make this allocation restricted and don ' t dump info on
* allocation failures , since we ' ll fallback to the mempool
* in case of failure .
2008-09-11 15:17:37 +04:00
*/
2008-12-11 13:53:43 +03:00
__gfp_mask | = __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN ;
2005-04-17 02:20:36 +04:00
2008-09-11 15:17:37 +04:00
/*
2008-12-11 13:53:43 +03:00
* Try a slab allocation . If this fails and __GFP_WAIT
* is set , retry with the 1 - entry mempool
2008-09-11 15:17:37 +04:00
*/
2008-12-11 13:53:43 +03:00
bvl = kmem_cache_alloc ( bvs - > slab , __gfp_mask ) ;
if ( unlikely ( ! bvl & & ( gfp_mask & __GFP_WAIT ) ) ) {
* idx = BIOVEC_MAX_IDX ;
goto fallback ;
}
}
2005-04-17 02:20:36 +04:00
return bvl ;
}
2012-09-07 02:35:00 +04:00
static void __bio_free ( struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2012-09-07 02:35:00 +04:00
bio_disassociate_task ( bio ) ;
2005-04-17 02:20:36 +04:00
2008-06-30 22:04:41 +04:00
if ( bio_integrity ( bio ) )
2012-09-07 02:34:56 +04:00
bio_integrity_free ( bio ) ;
2012-09-07 02:35:00 +04:00
}
2008-06-30 22:04:41 +04:00
2012-09-07 02:35:00 +04:00
static void bio_free ( struct bio * bio )
{
struct bio_set * bs = bio - > bi_pool ;
void * p ;
__bio_free ( bio ) ;
if ( bs ) {
if ( bio_has_allocated_vec ( bio ) )
bvec_free_bs ( bs , bio - > bi_io_vec , BIO_POOL_IDX ( bio ) ) ;
/*
* If we have front padding , adjust the bio pointer before freeing
*/
p = bio ;
2008-12-10 17:35:05 +03:00
p - = bs - > front_pad ;
2012-09-07 02:35:00 +04:00
mempool_free ( p , bs - > bio_pool ) ;
} else {
/* Bio was allocated by bio_kmalloc() */
kfree ( bio ) ;
}
2005-09-07 02:16:42 +04:00
}
2006-01-15 00:20:43 +03:00
void bio_init ( struct bio * bio )
2005-04-17 02:20:36 +04:00
{
2007-07-18 15:14:03 +04:00
memset ( bio , 0 , sizeof ( * bio ) ) ;
2005-04-17 02:20:36 +04:00
bio - > bi_flags = 1 < < BIO_UPTODATE ;
atomic_set ( & bio - > bi_cnt , 1 ) ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_init ) ;
2005-04-17 02:20:36 +04:00
2012-09-07 02:34:58 +04:00
/**
* bio_reset - reinitialize a bio
* @ bio : bio to reset
*
* Description :
* After calling bio_reset ( ) , @ bio will be in the same state as a freshly
* allocated bio returned bio bio_alloc_bioset ( ) - the only fields that are
* preserved are the ones that are initialized by bio_alloc_bioset ( ) . See
* comment in struct bio .
*/
void bio_reset ( struct bio * bio )
{
unsigned long flags = bio - > bi_flags & ( ~ 0UL < < BIO_RESET_BITS ) ;
2012-09-07 02:35:00 +04:00
__bio_free ( bio ) ;
2012-09-07 02:34:58 +04:00
memset ( bio , 0 , BIO_RESET_BYTES ) ;
bio - > bi_flags = flags | ( 1 < < BIO_UPTODATE ) ;
}
EXPORT_SYMBOL ( bio_reset ) ;
2005-04-17 02:20:36 +04:00
/**
* bio_alloc_bioset - allocate a bio for I / O
* @ gfp_mask : the GFP_ mask given to the slab allocator
* @ nr_iovecs : number of iovecs to pre - allocate
2010-01-15 13:05:07 +03:00
* @ bs : the bio_set to allocate from .
2005-04-17 02:20:36 +04:00
*
* Description :
2012-09-07 02:35:01 +04:00
* If @ bs is NULL , uses kmalloc ( ) to allocate the bio ; else the allocation is
* backed by the @ bs ' s mempool .
*
* When @ bs is not NULL , if % __GFP_WAIT is set then bio_alloc will always be
* able to allocate a bio . This is due to the mempool guarantees . To make this
* work , callers must never allocate more than 1 bio at a time from this pool .
* Callers that need to allocate more than 1 bio must always submit the
* previously allocated bio for IO before attempting to allocate a new one .
* Failure to do so can cause deadlocks under memory pressure .
*
* RETURNS :
* Pointer to new bio on success , NULL on failure .
*/
2005-10-07 10:46:04 +04:00
struct bio * bio_alloc_bioset ( gfp_t gfp_mask , int nr_iovecs , struct bio_set * bs )
2005-04-17 02:20:36 +04:00
{
2012-09-07 02:35:01 +04:00
unsigned front_pad ;
unsigned inline_vecs ;
2009-04-15 21:50:51 +04:00
unsigned long idx = BIO_POOL_NONE ;
2009-02-21 13:16:36 +03:00
struct bio_vec * bvl = NULL ;
2009-04-15 21:50:51 +04:00
struct bio * bio ;
void * p ;
2012-09-07 02:35:01 +04:00
if ( ! bs ) {
if ( nr_iovecs > UIO_MAXIOV )
return NULL ;
p = kmalloc ( sizeof ( struct bio ) +
nr_iovecs * sizeof ( struct bio_vec ) ,
gfp_mask ) ;
front_pad = 0 ;
inline_vecs = nr_iovecs ;
} else {
p = mempool_alloc ( bs - > bio_pool , gfp_mask ) ;
front_pad = bs - > front_pad ;
inline_vecs = BIO_INLINE_VECS ;
}
2009-04-15 21:50:51 +04:00
if ( unlikely ( ! p ) )
return NULL ;
2005-04-17 02:20:36 +04:00
2012-09-07 02:35:01 +04:00
bio = p + front_pad ;
2009-02-21 13:16:36 +03:00
bio_init ( bio ) ;
2012-09-07 02:35:01 +04:00
if ( nr_iovecs > inline_vecs ) {
2009-02-21 13:16:36 +03:00
bvl = bvec_alloc_bs ( gfp_mask , nr_iovecs , & idx , bs ) ;
if ( unlikely ( ! bvl ) )
goto err_free ;
2012-09-07 02:35:01 +04:00
} else if ( nr_iovecs ) {
bvl = bio - > bi_inline_vecs ;
2005-04-17 02:20:36 +04:00
}
2012-09-07 02:35:01 +04:00
bio - > bi_pool = bs ;
2009-02-21 13:16:36 +03:00
bio - > bi_flags | = idx < < BIO_POOL_OFFSET ;
bio - > bi_max_vecs = nr_iovecs ;
bio - > bi_io_vec = bvl ;
2005-04-17 02:20:36 +04:00
return bio ;
2009-02-21 13:16:36 +03:00
err_free :
2009-04-15 21:50:51 +04:00
mempool_free ( p , bs - > bio_pool ) ;
2009-02-21 13:16:36 +03:00
return NULL ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_alloc_bioset ) ;
2005-04-17 02:20:36 +04:00
void zero_fill_bio ( struct bio * bio )
{
unsigned long flags ;
struct bio_vec * bv ;
int i ;
bio_for_each_segment ( bv , bio , i ) {
char * data = bvec_kmap_irq ( bv , & flags ) ;
memset ( data , 0 , bv - > bv_len ) ;
flush_dcache_page ( bv - > bv_page ) ;
bvec_kunmap_irq ( data , & flags ) ;
}
}
EXPORT_SYMBOL ( zero_fill_bio ) ;
/**
* bio_put - release a reference to a bio
* @ bio : bio to release reference to
*
* Description :
* Put a reference to a & struct bio , either one you have gotten with
2009-11-02 13:39:22 +03:00
* bio_alloc , bio_get or bio_clone . The last put of a bio will free it .
2005-04-17 02:20:36 +04:00
* */
void bio_put ( struct bio * bio )
{
BIO_BUG_ON ( ! atomic_read ( & bio - > bi_cnt ) ) ;
/*
* last put frees it
*/
2012-09-07 02:35:00 +04:00
if ( atomic_dec_and_test ( & bio - > bi_cnt ) )
bio_free ( bio ) ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_put ) ;
2005-04-17 02:20:36 +04:00
2007-07-24 11:28:11 +04:00
inline int bio_phys_segments ( struct request_queue * q , struct bio * bio )
2005-04-17 02:20:36 +04:00
{
if ( unlikely ( ! bio_flagged ( bio , BIO_SEG_VALID ) ) )
blk_recount_segments ( q , bio ) ;
return bio - > bi_phys_segments ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_phys_segments ) ;
2005-04-17 02:20:36 +04:00
/**
* __bio_clone - clone a bio
* @ bio : destination bio
* @ bio_src : bio to clone
*
* Clone a & bio . Caller will own the returned bio , but not
* the actual data it points to . Reference count of returned
* bio will be one .
*/
2006-01-15 00:20:43 +03:00
void __bio_clone ( struct bio * bio , struct bio * bio_src )
2005-04-17 02:20:36 +04:00
{
2005-08-07 20:42:12 +04:00
memcpy ( bio - > bi_io_vec , bio_src - > bi_io_vec ,
bio_src - > bi_max_vecs * sizeof ( struct bio_vec ) ) ;
2005-04-17 02:20:36 +04:00
2008-01-25 14:44:44 +03:00
/*
* most users will be overriding - > bi_bdev with a new target ,
* so we don ' t set nor calculate new physical / hw segment counts here
*/
2005-04-17 02:20:36 +04:00
bio - > bi_sector = bio_src - > bi_sector ;
bio - > bi_bdev = bio_src - > bi_bdev ;
bio - > bi_flags | = 1 < < BIO_CLONED ;
bio - > bi_rw = bio_src - > bi_rw ;
bio - > bi_vcnt = bio_src - > bi_vcnt ;
bio - > bi_size = bio_src - > bi_size ;
2005-07-28 12:07:18 +04:00
bio - > bi_idx = bio_src - > bi_idx ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( __bio_clone ) ;
2005-04-17 02:20:36 +04:00
/**
2012-09-07 02:35:02 +04:00
* bio_clone_bioset - clone a bio
2005-04-17 02:20:36 +04:00
* @ bio : bio to clone
* @ gfp_mask : allocation priority
2012-09-07 02:35:02 +04:00
* @ bs : bio_set to allocate from
2005-04-17 02:20:36 +04:00
*
* Like __bio_clone , only also allocates the returned bio
*/
2012-09-07 02:35:02 +04:00
struct bio * bio_clone_bioset ( struct bio * bio , gfp_t gfp_mask ,
struct bio_set * bs )
2005-04-17 02:20:36 +04:00
{
2012-09-07 02:35:02 +04:00
struct bio * b ;
2005-04-17 02:20:36 +04:00
2012-09-07 02:35:02 +04:00
b = bio_alloc_bioset ( gfp_mask , bio - > bi_max_vecs , bs ) ;
2008-06-30 22:04:41 +04:00
if ( ! b )
return NULL ;
__bio_clone ( b , bio ) ;
if ( bio_integrity ( bio ) ) {
int ret ;
2012-09-07 02:34:56 +04:00
ret = bio_integrity_clone ( b , bio , gfp_mask ) ;
2008-06-30 22:04:41 +04:00
2009-03-09 12:42:45 +03:00
if ( ret < 0 ) {
bio_put ( b ) ;
2008-06-30 22:04:41 +04:00
return NULL ;
2009-03-09 12:42:45 +03:00
}
2005-09-07 02:16:42 +04:00
}
2005-04-17 02:20:36 +04:00
return b ;
}
2012-09-07 02:35:02 +04:00
EXPORT_SYMBOL ( bio_clone_bioset ) ;
2005-04-17 02:20:36 +04:00
/**
* bio_get_nr_vecs - return approx number of vecs
* @ bdev : I / O target
*
* Return the approximate number of pages we can send to this target .
* There ' s no guarantee that you will be able to fit this number of pages
* into a bio , it does not account for dynamic restrictions that vary
* on offset .
*/
int bio_get_nr_vecs ( struct block_device * bdev )
{
2007-07-24 11:28:11 +04:00
struct request_queue * q = bdev_get_queue ( bdev ) ;
2012-05-11 18:36:44 +04:00
int nr_pages ;
nr_pages = min_t ( unsigned ,
2012-02-09 01:07:18 +04:00
queue_max_segments ( q ) ,
queue_max_sectors ( q ) / ( PAGE_SIZE > > 9 ) + 1 ) ;
2012-05-11 18:36:44 +04:00
return min_t ( unsigned , nr_pages , BIO_MAX_PAGES ) ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_get_nr_vecs ) ;
2005-04-17 02:20:36 +04:00
2007-07-24 11:28:11 +04:00
static int __bio_add_page ( struct request_queue * q , struct bio * bio , struct page
2005-12-05 11:37:06 +03:00
* page , unsigned int len , unsigned int offset ,
unsigned short max_sectors )
2005-04-17 02:20:36 +04:00
{
int retried_segments = 0 ;
struct bio_vec * bvec ;
/*
* cloned bio must not modify vec list
*/
if ( unlikely ( bio_flagged ( bio , BIO_CLONED ) ) )
return 0 ;
2006-01-06 11:43:28 +03:00
if ( ( ( bio - > bi_size + len ) > > 9 ) > max_sectors )
2005-04-17 02:20:36 +04:00
return 0 ;
2006-01-06 11:43:28 +03:00
/*
* For filesystems with a blocksize smaller than the pagesize
* we will often be called with the same page as last time and
* a consecutive offset . Optimize this special case .
*/
if ( bio - > bi_vcnt > 0 ) {
struct bio_vec * prev = & bio - > bi_io_vec [ bio - > bi_vcnt - 1 ] ;
if ( page = = prev - > bv_page & &
offset = = prev - > bv_offset + prev - > bv_len ) {
2010-01-27 22:44:36 +03:00
unsigned int prev_bv_len = prev - > bv_len ;
2006-01-06 11:43:28 +03:00
prev - > bv_len + = len ;
2008-07-03 11:53:43 +04:00
if ( q - > merge_bvec_fn ) {
struct bvec_merge_data bvm = {
2010-01-27 22:44:36 +03:00
/* prev_bvec is already charged in
bi_size , discharge it in order to
simulate merging updated prev_bvec
as new bvec . */
2008-07-03 11:53:43 +04:00
. bi_bdev = bio - > bi_bdev ,
. bi_sector = bio - > bi_sector ,
2010-01-27 22:44:36 +03:00
. bi_size = bio - > bi_size - prev_bv_len ,
2008-07-03 11:53:43 +04:00
. bi_rw = bio - > bi_rw ,
} ;
2010-03-03 06:28:06 +03:00
if ( q - > merge_bvec_fn ( q , & bvm , prev ) < prev - > bv_len ) {
2008-07-03 11:53:43 +04:00
prev - > bv_len - = len ;
return 0 ;
}
2006-01-06 11:43:28 +03:00
}
goto done ;
}
}
if ( bio - > bi_vcnt > = bio - > bi_max_vecs )
2005-04-17 02:20:36 +04:00
return 0 ;
/*
* we might lose a segment or two here , but rather that than
* make this too complex .
*/
2010-02-26 08:20:39 +03:00
while ( bio - > bi_phys_segments > = queue_max_segments ( q ) ) {
2005-04-17 02:20:36 +04:00
if ( retried_segments )
return 0 ;
retried_segments = 1 ;
blk_recount_segments ( q , bio ) ;
}
/*
* setup the new entry , we might clear it again later if we
* cannot add the page
*/
bvec = & bio - > bi_io_vec [ bio - > bi_vcnt ] ;
bvec - > bv_page = page ;
bvec - > bv_len = len ;
bvec - > bv_offset = offset ;
/*
* if queue has other restrictions ( eg varying max sector size
* depending on offset ) , it can specify a merge_bvec_fn in the
* queue to get further control
*/
if ( q - > merge_bvec_fn ) {
2008-07-03 11:53:43 +04:00
struct bvec_merge_data bvm = {
. bi_bdev = bio - > bi_bdev ,
. bi_sector = bio - > bi_sector ,
. bi_size = bio - > bi_size ,
. bi_rw = bio - > bi_rw ,
} ;
2005-04-17 02:20:36 +04:00
/*
* merge_bvec_fn ( ) returns number of bytes it can accept
* at this offset
*/
2010-03-03 06:28:06 +03:00
if ( q - > merge_bvec_fn ( q , & bvm , bvec ) < bvec - > bv_len ) {
2005-04-17 02:20:36 +04:00
bvec - > bv_page = NULL ;
bvec - > bv_len = 0 ;
bvec - > bv_offset = 0 ;
return 0 ;
}
}
/* If we may be able to merge these biovecs, force a recount */
2008-08-15 12:15:19 +04:00
if ( bio - > bi_vcnt & & ( BIOVEC_PHYS_MERGEABLE ( bvec - 1 , bvec ) ) )
2005-04-17 02:20:36 +04:00
bio - > bi_flags & = ~ ( 1 < < BIO_SEG_VALID ) ;
bio - > bi_vcnt + + ;
bio - > bi_phys_segments + + ;
2006-01-06 11:43:28 +03:00
done :
2005-04-17 02:20:36 +04:00
bio - > bi_size + = len ;
return len ;
}
2005-11-11 14:30:27 +03:00
/**
* bio_add_pc_page - attempt to add page to bio
2006-01-31 17:24:34 +03:00
* @ q : the target queue
2005-11-11 14:30:27 +03:00
* @ bio : destination bio
* @ page : page to add
* @ len : vec entry length
* @ offset : vec entry offset
*
* Attempt to add a page to the bio_vec maplist . This can fail for a
2011-05-27 16:52:09 +04:00
* number of reasons , such as the bio being full or target block device
* limitations . The target block device must allow bio ' s up to PAGE_SIZE ,
* so it is always possible to add a single page to an empty bio .
*
* This should only be used by REQ_PC bios .
2005-11-11 14:30:27 +03:00
*/
2007-07-24 11:28:11 +04:00
int bio_add_pc_page ( struct request_queue * q , struct bio * bio , struct page * page ,
2005-11-11 14:30:27 +03:00
unsigned int len , unsigned int offset )
{
2009-05-23 01:17:50 +04:00
return __bio_add_page ( q , bio , page , len , offset ,
queue_max_hw_sectors ( q ) ) ;
2005-11-11 14:30:27 +03:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_add_pc_page ) ;
2005-11-11 14:30:27 +03:00
2005-04-17 02:20:36 +04:00
/**
* bio_add_page - attempt to add page to bio
* @ bio : destination bio
* @ page : page to add
* @ len : vec entry length
* @ offset : vec entry offset
*
* Attempt to add a page to the bio_vec maplist . This can fail for a
2011-05-27 16:52:09 +04:00
* number of reasons , such as the bio being full or target block device
* limitations . The target block device must allow bio ' s up to PAGE_SIZE ,
* so it is always possible to add a single page to an empty bio .
2005-04-17 02:20:36 +04:00
*/
int bio_add_page ( struct bio * bio , struct page * page , unsigned int len ,
unsigned int offset )
{
2005-12-05 11:37:06 +03:00
struct request_queue * q = bdev_get_queue ( bio - > bi_bdev ) ;
2009-05-23 01:17:50 +04:00
return __bio_add_page ( q , bio , page , len , offset , queue_max_sectors ( q ) ) ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_add_page ) ;
2005-04-17 02:20:36 +04:00
struct bio_map_data {
struct bio_vec * iovecs ;
2008-04-11 14:56:49 +04:00
struct sg_iovec * sgvecs ;
2008-08-28 11:17:06 +04:00
int nr_sgvecs ;
int is_our_pages ;
2005-04-17 02:20:36 +04:00
} ;
2008-04-11 14:56:49 +04:00
static void bio_set_map_data ( struct bio_map_data * bmd , struct bio * bio ,
2008-08-28 11:17:06 +04:00
struct sg_iovec * iov , int iov_count ,
int is_our_pages )
2005-04-17 02:20:36 +04:00
{
memcpy ( bmd - > iovecs , bio - > bi_io_vec , sizeof ( struct bio_vec ) * bio - > bi_vcnt ) ;
2008-04-11 14:56:49 +04:00
memcpy ( bmd - > sgvecs , iov , sizeof ( struct sg_iovec ) * iov_count ) ;
bmd - > nr_sgvecs = iov_count ;
2008-08-28 11:17:06 +04:00
bmd - > is_our_pages = is_our_pages ;
2005-04-17 02:20:36 +04:00
bio - > bi_private = bmd ;
}
static void bio_free_map_data ( struct bio_map_data * bmd )
{
kfree ( bmd - > iovecs ) ;
2008-04-11 14:56:49 +04:00
kfree ( bmd - > sgvecs ) ;
2005-04-17 02:20:36 +04:00
kfree ( bmd ) ;
}
2011-11-16 12:21:50 +04:00
static struct bio_map_data * bio_alloc_map_data ( int nr_segs ,
unsigned int iov_count ,
2008-08-25 22:36:08 +04:00
gfp_t gfp_mask )
2005-04-17 02:20:36 +04:00
{
2010-10-29 21:46:56 +04:00
struct bio_map_data * bmd ;
if ( iov_count > UIO_MAXIOV )
return NULL ;
2005-04-17 02:20:36 +04:00
2010-10-29 21:46:56 +04:00
bmd = kmalloc ( sizeof ( * bmd ) , gfp_mask ) ;
2005-04-17 02:20:36 +04:00
if ( ! bmd )
return NULL ;
2008-08-25 22:36:08 +04:00
bmd - > iovecs = kmalloc ( sizeof ( struct bio_vec ) * nr_segs , gfp_mask ) ;
2008-04-11 14:56:49 +04:00
if ( ! bmd - > iovecs ) {
kfree ( bmd ) ;
return NULL ;
}
2008-08-25 22:36:08 +04:00
bmd - > sgvecs = kmalloc ( sizeof ( struct sg_iovec ) * iov_count , gfp_mask ) ;
2008-04-11 14:56:49 +04:00
if ( bmd - > sgvecs )
2005-04-17 02:20:36 +04:00
return bmd ;
2008-04-11 14:56:49 +04:00
kfree ( bmd - > iovecs ) ;
2005-04-17 02:20:36 +04:00
kfree ( bmd ) ;
return NULL ;
}
2008-08-25 22:36:08 +04:00
static int __bio_copy_iov ( struct bio * bio , struct bio_vec * iovecs ,
2009-07-09 16:46:53 +04:00
struct sg_iovec * iov , int iov_count ,
int to_user , int from_user , int do_free_page )
2008-04-11 14:56:49 +04:00
{
int ret = 0 , i ;
struct bio_vec * bvec ;
int iov_idx = 0 ;
unsigned int iov_off = 0 ;
__bio_for_each_segment ( bvec , bio , i , 0 ) {
char * bv_addr = page_address ( bvec - > bv_page ) ;
2008-08-25 22:36:08 +04:00
unsigned int bv_len = iovecs [ i ] . bv_len ;
2008-04-11 14:56:49 +04:00
while ( bv_len & & iov_idx < iov_count ) {
unsigned int bytes ;
2009-06-10 23:57:07 +04:00
char __user * iov_addr ;
2008-04-11 14:56:49 +04:00
bytes = min_t ( unsigned int ,
iov [ iov_idx ] . iov_len - iov_off , bv_len ) ;
iov_addr = iov [ iov_idx ] . iov_base + iov_off ;
if ( ! ret ) {
2009-07-09 16:46:53 +04:00
if ( to_user )
2008-04-11 14:56:49 +04:00
ret = copy_to_user ( iov_addr , bv_addr ,
bytes ) ;
2009-07-09 16:46:53 +04:00
if ( from_user )
ret = copy_from_user ( bv_addr , iov_addr ,
bytes ) ;
2008-04-11 14:56:49 +04:00
if ( ret )
ret = - EFAULT ;
}
bv_len - = bytes ;
bv_addr + = bytes ;
iov_addr + = bytes ;
iov_off + = bytes ;
if ( iov [ iov_idx ] . iov_len = = iov_off ) {
iov_idx + + ;
iov_off = 0 ;
}
}
2008-08-28 11:17:06 +04:00
if ( do_free_page )
2008-04-11 14:56:49 +04:00
__free_page ( bvec - > bv_page ) ;
}
return ret ;
}
2005-04-17 02:20:36 +04:00
/**
* bio_uncopy_user - finish previously mapped bio
* @ bio : bio being terminated
*
* Free pages allocated from bio_copy_user ( ) and write back data
* to user space in case of a read .
*/
int bio_uncopy_user ( struct bio * bio )
{
struct bio_map_data * bmd = bio - > bi_private ;
2008-09-02 11:20:19 +04:00
int ret = 0 ;
2005-04-17 02:20:36 +04:00
2008-09-02 11:20:19 +04:00
if ( ! bio_flagged ( bio , BIO_NULL_MAPPED ) )
ret = __bio_copy_iov ( bio , bmd - > iovecs , bmd - > sgvecs ,
2009-07-09 16:46:53 +04:00
bmd - > nr_sgvecs , bio_data_dir ( bio ) = = READ ,
0 , bmd - > is_our_pages ) ;
2005-04-17 02:20:36 +04:00
bio_free_map_data ( bmd ) ;
bio_put ( bio ) ;
return ret ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_uncopy_user ) ;
2005-04-17 02:20:36 +04:00
/**
2008-04-11 14:56:49 +04:00
* bio_copy_user_iov - copy user data to bio
2005-04-17 02:20:36 +04:00
* @ q : destination block queue
2008-08-28 11:17:06 +04:00
* @ map_data : pointer to the rq_map_data holding pages ( if necessary )
2008-04-11 14:56:49 +04:00
* @ iov : the iovec .
* @ iov_count : number of elements in the iovec
2005-04-17 02:20:36 +04:00
* @ write_to_vm : bool indicating writing to pages or not
2008-08-28 11:17:05 +04:00
* @ gfp_mask : memory allocation flags
2005-04-17 02:20:36 +04:00
*
* Prepares and returns a bio for indirect user io , bouncing data
* to / from kernel pages as necessary . Must be paired with
* call bio_uncopy_user ( ) on io completion .
*/
2008-08-28 11:17:06 +04:00
struct bio * bio_copy_user_iov ( struct request_queue * q ,
struct rq_map_data * map_data ,
struct sg_iovec * iov , int iov_count ,
int write_to_vm , gfp_t gfp_mask )
2005-04-17 02:20:36 +04:00
{
struct bio_map_data * bmd ;
struct bio_vec * bvec ;
struct page * page ;
struct bio * bio ;
int i , ret ;
2008-04-11 14:56:49 +04:00
int nr_pages = 0 ;
unsigned int len = 0 ;
2008-12-18 08:49:37 +03:00
unsigned int offset = map_data ? map_data - > offset & ~ PAGE_MASK : 0 ;
2005-04-17 02:20:36 +04:00
2008-04-11 14:56:49 +04:00
for ( i = 0 ; i < iov_count ; i + + ) {
unsigned long uaddr ;
unsigned long end ;
unsigned long start ;
uaddr = ( unsigned long ) iov [ i ] . iov_base ;
end = ( uaddr + iov [ i ] . iov_len + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
start = uaddr > > PAGE_SHIFT ;
2010-11-10 16:36:25 +03:00
/*
* Overflow , abort
*/
if ( end < start )
return ERR_PTR ( - EINVAL ) ;
2008-04-11 14:56:49 +04:00
nr_pages + = end - start ;
len + = iov [ i ] . iov_len ;
}
2009-04-28 22:24:29 +04:00
if ( offset )
nr_pages + + ;
2008-08-28 11:17:05 +04:00
bmd = bio_alloc_map_data ( nr_pages , iov_count , gfp_mask ) ;
2005-04-17 02:20:36 +04:00
if ( ! bmd )
return ERR_PTR ( - ENOMEM ) ;
ret = - ENOMEM ;
2009-04-15 17:10:27 +04:00
bio = bio_kmalloc ( gfp_mask , nr_pages ) ;
2005-04-17 02:20:36 +04:00
if ( ! bio )
goto out_bmd ;
2010-08-07 20:20:39 +04:00
if ( ! write_to_vm )
bio - > bi_rw | = REQ_WRITE ;
2005-04-17 02:20:36 +04:00
ret = 0 ;
2008-12-18 08:49:37 +03:00
if ( map_data ) {
2008-12-18 08:49:36 +03:00
nr_pages = 1 < < map_data - > page_order ;
2008-12-18 08:49:37 +03:00
i = map_data - > offset / PAGE_SIZE ;
}
2005-04-17 02:20:36 +04:00
while ( len ) {
2008-12-18 08:49:36 +03:00
unsigned int bytes = PAGE_SIZE ;
2005-04-17 02:20:36 +04:00
2008-12-18 08:49:37 +03:00
bytes - = offset ;
2005-04-17 02:20:36 +04:00
if ( bytes > len )
bytes = len ;
2008-08-28 11:17:06 +04:00
if ( map_data ) {
2008-12-18 08:49:36 +03:00
if ( i = = map_data - > nr_entries * nr_pages ) {
2008-08-28 11:17:06 +04:00
ret = - ENOMEM ;
break ;
}
2008-12-18 08:49:36 +03:00
page = map_data - > pages [ i / nr_pages ] ;
page + = ( i % nr_pages ) ;
i + + ;
} else {
2008-08-28 11:17:06 +04:00
page = alloc_page ( q - > bounce_gfp | gfp_mask ) ;
2008-12-18 08:49:36 +03:00
if ( ! page ) {
ret = - ENOMEM ;
break ;
}
2005-04-17 02:20:36 +04:00
}
2008-12-18 08:49:37 +03:00
if ( bio_add_pc_page ( q , bio , page , bytes , offset ) < bytes )
2005-04-17 02:20:36 +04:00
break ;
len - = bytes ;
2008-12-18 08:49:37 +03:00
offset = 0 ;
2005-04-17 02:20:36 +04:00
}
if ( ret )
goto cleanup ;
/*
* success
*/
2009-07-09 16:46:53 +04:00
if ( ( ! write_to_vm & & ( ! map_data | | ! map_data - > null_mapped ) ) | |
( map_data & & map_data - > from_user ) ) {
ret = __bio_copy_iov ( bio , bio - > bi_io_vec , iov , iov_count , 0 , 1 , 0 ) ;
2008-04-11 14:56:49 +04:00
if ( ret )
goto cleanup ;
2005-04-17 02:20:36 +04:00
}
2008-08-28 11:17:06 +04:00
bio_set_map_data ( bmd , bio , iov , iov_count , map_data ? 0 : 1 ) ;
2005-04-17 02:20:36 +04:00
return bio ;
cleanup :
2008-08-28 11:17:06 +04:00
if ( ! map_data )
bio_for_each_segment ( bvec , bio , i )
__free_page ( bvec - > bv_page ) ;
2005-04-17 02:20:36 +04:00
bio_put ( bio ) ;
out_bmd :
bio_free_map_data ( bmd ) ;
return ERR_PTR ( ret ) ;
}
2008-04-11 14:56:49 +04:00
/**
* bio_copy_user - copy user data to bio
* @ q : destination block queue
2008-08-28 11:17:06 +04:00
* @ map_data : pointer to the rq_map_data holding pages ( if necessary )
2008-04-11 14:56:49 +04:00
* @ uaddr : start of user address
* @ len : length in bytes
* @ write_to_vm : bool indicating writing to pages or not
2008-08-28 11:17:05 +04:00
* @ gfp_mask : memory allocation flags
2008-04-11 14:56:49 +04:00
*
* Prepares and returns a bio for indirect user io , bouncing data
* to / from kernel pages as necessary . Must be paired with
* call bio_uncopy_user ( ) on io completion .
*/
2008-08-28 11:17:06 +04:00
struct bio * bio_copy_user ( struct request_queue * q , struct rq_map_data * map_data ,
unsigned long uaddr , unsigned int len ,
int write_to_vm , gfp_t gfp_mask )
2008-04-11 14:56:49 +04:00
{
struct sg_iovec iov ;
iov . iov_base = ( void __user * ) uaddr ;
iov . iov_len = len ;
2008-08-28 11:17:06 +04:00
return bio_copy_user_iov ( q , map_data , & iov , 1 , write_to_vm , gfp_mask ) ;
2008-04-11 14:56:49 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_copy_user ) ;
2008-04-11 14:56:49 +04:00
2007-07-24 11:28:11 +04:00
static struct bio * __bio_map_user_iov ( struct request_queue * q ,
2005-06-20 16:06:52 +04:00
struct block_device * bdev ,
struct sg_iovec * iov , int iov_count ,
2008-08-28 11:17:05 +04:00
int write_to_vm , gfp_t gfp_mask )
2005-04-17 02:20:36 +04:00
{
2005-06-20 16:06:52 +04:00
int i , j ;
int nr_pages = 0 ;
2005-04-17 02:20:36 +04:00
struct page * * pages ;
struct bio * bio ;
2005-06-20 16:06:52 +04:00
int cur_page = 0 ;
int ret , offset ;
2005-04-17 02:20:36 +04:00
2005-06-20 16:06:52 +04:00
for ( i = 0 ; i < iov_count ; i + + ) {
unsigned long uaddr = ( unsigned long ) iov [ i ] . iov_base ;
unsigned long len = iov [ i ] . iov_len ;
unsigned long end = ( uaddr + len + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
unsigned long start = uaddr > > PAGE_SHIFT ;
2010-11-10 16:36:25 +03:00
/*
* Overflow , abort
*/
if ( end < start )
return ERR_PTR ( - EINVAL ) ;
2005-06-20 16:06:52 +04:00
nr_pages + = end - start ;
/*
2006-12-01 12:40:20 +03:00
* buffer must be aligned to at least hardsector size for now
2005-06-20 16:06:52 +04:00
*/
2006-12-01 12:40:20 +03:00
if ( uaddr & queue_dma_alignment ( q ) )
2005-06-20 16:06:52 +04:00
return ERR_PTR ( - EINVAL ) ;
}
if ( ! nr_pages )
2005-04-17 02:20:36 +04:00
return ERR_PTR ( - EINVAL ) ;
2009-04-15 17:10:27 +04:00
bio = bio_kmalloc ( gfp_mask , nr_pages ) ;
2005-04-17 02:20:36 +04:00
if ( ! bio )
return ERR_PTR ( - ENOMEM ) ;
ret = - ENOMEM ;
2008-08-28 11:17:05 +04:00
pages = kcalloc ( nr_pages , sizeof ( struct page * ) , gfp_mask ) ;
2005-04-17 02:20:36 +04:00
if ( ! pages )
goto out ;
2005-06-20 16:06:52 +04:00
for ( i = 0 ; i < iov_count ; i + + ) {
unsigned long uaddr = ( unsigned long ) iov [ i ] . iov_base ;
unsigned long len = iov [ i ] . iov_len ;
unsigned long end = ( uaddr + len + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
unsigned long start = uaddr > > PAGE_SHIFT ;
const int local_nr_pages = end - start ;
const int page_limit = cur_page + local_nr_pages ;
2010-11-10 16:36:25 +03:00
2008-07-26 06:45:25 +04:00
ret = get_user_pages_fast ( uaddr , local_nr_pages ,
write_to_vm , & pages [ cur_page ] ) ;
2006-06-16 15:02:29 +04:00
if ( ret < local_nr_pages ) {
ret = - EFAULT ;
2005-06-20 16:06:52 +04:00
goto out_unmap ;
2006-06-16 15:02:29 +04:00
}
2005-06-20 16:06:52 +04:00
offset = uaddr & ~ PAGE_MASK ;
for ( j = cur_page ; j < page_limit ; j + + ) {
unsigned int bytes = PAGE_SIZE - offset ;
if ( len < = 0 )
break ;
if ( bytes > len )
bytes = len ;
/*
* sorry . . .
*/
2005-12-05 11:37:06 +03:00
if ( bio_add_pc_page ( q , bio , pages [ j ] , bytes , offset ) <
bytes )
2005-06-20 16:06:52 +04:00
break ;
len - = bytes ;
offset = 0 ;
}
2005-04-17 02:20:36 +04:00
2005-06-20 16:06:52 +04:00
cur_page = j ;
2005-04-17 02:20:36 +04:00
/*
2005-06-20 16:06:52 +04:00
* release the pages we didn ' t map into the bio , if any
2005-04-17 02:20:36 +04:00
*/
2005-06-20 16:06:52 +04:00
while ( j < page_limit )
page_cache_release ( pages [ j + + ] ) ;
2005-04-17 02:20:36 +04:00
}
kfree ( pages ) ;
/*
* set data direction , and check if mapped pages need bouncing
*/
if ( ! write_to_vm )
2010-08-07 20:20:39 +04:00
bio - > bi_rw | = REQ_WRITE ;
2005-04-17 02:20:36 +04:00
2005-06-20 16:06:52 +04:00
bio - > bi_bdev = bdev ;
2005-04-17 02:20:36 +04:00
bio - > bi_flags | = ( 1 < < BIO_USER_MAPPED ) ;
return bio ;
2005-06-20 16:06:52 +04:00
out_unmap :
for ( i = 0 ; i < nr_pages ; i + + ) {
if ( ! pages [ i ] )
break ;
page_cache_release ( pages [ i ] ) ;
}
out :
2005-04-17 02:20:36 +04:00
kfree ( pages ) ;
bio_put ( bio ) ;
return ERR_PTR ( ret ) ;
}
/**
* bio_map_user - map user address into bio
2007-07-24 11:28:11 +04:00
* @ q : the struct request_queue for the bio
2005-04-17 02:20:36 +04:00
* @ bdev : destination block device
* @ uaddr : start of user address
* @ len : length in bytes
* @ write_to_vm : bool indicating writing to pages or not
2008-08-28 11:17:05 +04:00
* @ gfp_mask : memory allocation flags
2005-04-17 02:20:36 +04:00
*
* Map the user space address into a bio suitable for io to a block
* device . Returns an error pointer in case of error .
*/
2007-07-24 11:28:11 +04:00
struct bio * bio_map_user ( struct request_queue * q , struct block_device * bdev ,
2008-08-28 11:17:05 +04:00
unsigned long uaddr , unsigned int len , int write_to_vm ,
gfp_t gfp_mask )
2005-06-20 16:06:52 +04:00
{
struct sg_iovec iov ;
2005-09-09 19:53:56 +04:00
iov . iov_base = ( void __user * ) uaddr ;
2005-06-20 16:06:52 +04:00
iov . iov_len = len ;
2008-08-28 11:17:05 +04:00
return bio_map_user_iov ( q , bdev , & iov , 1 , write_to_vm , gfp_mask ) ;
2005-06-20 16:06:52 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_map_user ) ;
2005-06-20 16:06:52 +04:00
/**
* bio_map_user_iov - map user sg_iovec table into bio
2007-07-24 11:28:11 +04:00
* @ q : the struct request_queue for the bio
2005-06-20 16:06:52 +04:00
* @ bdev : destination block device
* @ iov : the iovec .
* @ iov_count : number of elements in the iovec
* @ write_to_vm : bool indicating writing to pages or not
2008-08-28 11:17:05 +04:00
* @ gfp_mask : memory allocation flags
2005-06-20 16:06:52 +04:00
*
* Map the user space address into a bio suitable for io to a block
* device . Returns an error pointer in case of error .
*/
2007-07-24 11:28:11 +04:00
struct bio * bio_map_user_iov ( struct request_queue * q , struct block_device * bdev ,
2005-06-20 16:06:52 +04:00
struct sg_iovec * iov , int iov_count ,
2008-08-28 11:17:05 +04:00
int write_to_vm , gfp_t gfp_mask )
2005-04-17 02:20:36 +04:00
{
struct bio * bio ;
2008-08-28 11:17:05 +04:00
bio = __bio_map_user_iov ( q , bdev , iov , iov_count , write_to_vm ,
gfp_mask ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( bio ) )
return bio ;
/*
* subtle - - if __bio_map_user ( ) ended up bouncing a bio ,
* it would normally disappear when its bi_end_io is run .
* however , we need it for the unmap , so grab an extra
* reference to it
*/
bio_get ( bio ) ;
2006-12-01 12:40:55 +03:00
return bio ;
2005-04-17 02:20:36 +04:00
}
static void __bio_unmap_user ( struct bio * bio )
{
struct bio_vec * bvec ;
int i ;
/*
* make sure we dirty pages we wrote to
*/
__bio_for_each_segment ( bvec , bio , i , 0 ) {
if ( bio_data_dir ( bio ) = = READ )
set_page_dirty_lock ( bvec - > bv_page ) ;
page_cache_release ( bvec - > bv_page ) ;
}
bio_put ( bio ) ;
}
/**
* bio_unmap_user - unmap a bio
* @ bio : the bio being unmapped
*
* Unmap a bio previously mapped by bio_map_user ( ) . Must be called with
* a process context .
*
* bio_unmap_user ( ) may sleep .
*/
void bio_unmap_user ( struct bio * bio )
{
__bio_unmap_user ( bio ) ;
bio_put ( bio ) ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_unmap_user ) ;
2005-04-17 02:20:36 +04:00
2007-09-27 14:47:43 +04:00
static void bio_map_kern_endio ( struct bio * bio , int err )
2005-06-20 16:05:27 +04:00
{
bio_put ( bio ) ;
}
2007-07-24 11:28:11 +04:00
static struct bio * __bio_map_kern ( struct request_queue * q , void * data ,
2005-10-21 11:20:48 +04:00
unsigned int len , gfp_t gfp_mask )
2005-06-20 16:04:44 +04:00
{
unsigned long kaddr = ( unsigned long ) data ;
unsigned long end = ( kaddr + len + PAGE_SIZE - 1 ) > > PAGE_SHIFT ;
unsigned long start = kaddr > > PAGE_SHIFT ;
const int nr_pages = end - start ;
int offset , i ;
struct bio * bio ;
2009-04-15 17:10:27 +04:00
bio = bio_kmalloc ( gfp_mask , nr_pages ) ;
2005-06-20 16:04:44 +04:00
if ( ! bio )
return ERR_PTR ( - ENOMEM ) ;
offset = offset_in_page ( kaddr ) ;
for ( i = 0 ; i < nr_pages ; i + + ) {
unsigned int bytes = PAGE_SIZE - offset ;
if ( len < = 0 )
break ;
if ( bytes > len )
bytes = len ;
2005-12-05 11:37:06 +03:00
if ( bio_add_pc_page ( q , bio , virt_to_page ( data ) , bytes ,
offset ) < bytes )
2005-06-20 16:04:44 +04:00
break ;
data + = bytes ;
len - = bytes ;
offset = 0 ;
}
2005-06-20 16:05:27 +04:00
bio - > bi_end_io = bio_map_kern_endio ;
2005-06-20 16:04:44 +04:00
return bio ;
}
/**
* bio_map_kern - map kernel address into bio
2007-07-24 11:28:11 +04:00
* @ q : the struct request_queue for the bio
2005-06-20 16:04:44 +04:00
* @ data : pointer to buffer to map
* @ len : length in bytes
* @ gfp_mask : allocation flags for bio allocation
*
* Map the kernel address into a bio suitable for io to a block
* device . Returns an error pointer in case of error .
*/
2007-07-24 11:28:11 +04:00
struct bio * bio_map_kern ( struct request_queue * q , void * data , unsigned int len ,
2005-10-21 11:20:48 +04:00
gfp_t gfp_mask )
2005-06-20 16:04:44 +04:00
{
struct bio * bio ;
bio = __bio_map_kern ( q , data , len , gfp_mask ) ;
if ( IS_ERR ( bio ) )
return bio ;
if ( bio - > bi_size = = len )
return bio ;
/*
* Don ' t support partial mappings .
*/
bio_put ( bio ) ;
return ERR_PTR ( - EINVAL ) ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_map_kern ) ;
2005-06-20 16:04:44 +04:00
2008-04-25 14:47:50 +04:00
static void bio_copy_kern_endio ( struct bio * bio , int err )
{
struct bio_vec * bvec ;
const int read = bio_data_dir ( bio ) = = READ ;
2008-08-25 22:36:08 +04:00
struct bio_map_data * bmd = bio - > bi_private ;
2008-04-25 14:47:50 +04:00
int i ;
2008-08-25 22:36:08 +04:00
char * p = bmd - > sgvecs [ 0 ] . iov_base ;
2008-04-25 14:47:50 +04:00
__bio_for_each_segment ( bvec , bio , i , 0 ) {
char * addr = page_address ( bvec - > bv_page ) ;
2008-08-25 22:36:08 +04:00
int len = bmd - > iovecs [ i ] . bv_len ;
2008-04-25 14:47:50 +04:00
2009-05-19 13:33:06 +04:00
if ( read )
2008-08-25 22:36:08 +04:00
memcpy ( p , addr , len ) ;
2008-04-25 14:47:50 +04:00
__free_page ( bvec - > bv_page ) ;
2008-08-25 22:36:08 +04:00
p + = len ;
2008-04-25 14:47:50 +04:00
}
2008-08-25 22:36:08 +04:00
bio_free_map_data ( bmd ) ;
2008-04-25 14:47:50 +04:00
bio_put ( bio ) ;
}
/**
* bio_copy_kern - copy kernel address into bio
* @ q : the struct request_queue for the bio
* @ data : pointer to buffer to copy
* @ len : length in bytes
* @ gfp_mask : allocation flags for bio and page allocation
2008-04-30 11:08:54 +04:00
* @ reading : data direction is READ
2008-04-25 14:47:50 +04:00
*
* copy the kernel address into a bio suitable for io to a block
* device . Returns an error pointer in case of error .
*/
struct bio * bio_copy_kern ( struct request_queue * q , void * data , unsigned int len ,
gfp_t gfp_mask , int reading )
{
struct bio * bio ;
struct bio_vec * bvec ;
2008-08-28 10:05:57 +04:00
int i ;
2008-04-25 14:47:50 +04:00
2008-08-28 10:05:57 +04:00
bio = bio_copy_user ( q , NULL , ( unsigned long ) data , len , 1 , gfp_mask ) ;
if ( IS_ERR ( bio ) )
return bio ;
2008-04-25 14:47:50 +04:00
if ( ! reading ) {
void * p = data ;
bio_for_each_segment ( bvec , bio , i ) {
char * addr = page_address ( bvec - > bv_page ) ;
memcpy ( addr , p , bvec - > bv_len ) ;
p + = bvec - > bv_len ;
}
}
bio - > bi_end_io = bio_copy_kern_endio ;
2008-08-25 22:36:08 +04:00
2008-04-25 14:47:50 +04:00
return bio ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_copy_kern ) ;
2008-04-25 14:47:50 +04:00
2005-04-17 02:20:36 +04:00
/*
* bio_set_pages_dirty ( ) and bio_check_pages_dirty ( ) are support functions
* for performing direct - IO in BIOs .
*
* The problem is that we cannot run set_page_dirty ( ) from interrupt context
* because the required locks are not interrupt - safe . So what we can do is to
* mark the pages dirty _before_ performing IO . And in interrupt context ,
* check that the pages are still dirty . If so , fine . If not , redirty them
* in process context .
*
* We special - case compound pages here : normally this means reads into hugetlb
* pages . The logic in here doesn ' t really work right for compound pages
* because the VM does not uniformly chase down the head page in all cases .
* But dirtiness of compound pages is pretty meaningless anyway : the VM doesn ' t
* handle them at all . So we skip compound pages here at an early stage .
*
* Note that this code is very hard to test under normal circumstances because
* direct - io pins the pages with get_user_pages ( ) . This makes
* is_page_cache_freeable return false , and the VM will not clean the pages .
2012-07-25 19:12:08 +04:00
* But other code ( eg , flusher threads ) could clean the pages if they are mapped
2005-04-17 02:20:36 +04:00
* pagecache .
*
* Simply disabling the call to bio_set_pages_dirty ( ) is a good way to test the
* deferred bio dirtying paths .
*/
/*
* bio_set_pages_dirty ( ) will mark all the bio ' s pages as dirty .
*/
void bio_set_pages_dirty ( struct bio * bio )
{
struct bio_vec * bvec = bio - > bi_io_vec ;
int i ;
for ( i = 0 ; i < bio - > bi_vcnt ; i + + ) {
struct page * page = bvec [ i ] . bv_page ;
if ( page & & ! PageCompound ( page ) )
set_page_dirty_lock ( page ) ;
}
}
2008-02-18 15:48:32 +03:00
static void bio_release_pages ( struct bio * bio )
2005-04-17 02:20:36 +04:00
{
struct bio_vec * bvec = bio - > bi_io_vec ;
int i ;
for ( i = 0 ; i < bio - > bi_vcnt ; i + + ) {
struct page * page = bvec [ i ] . bv_page ;
if ( page )
put_page ( page ) ;
}
}
/*
* bio_check_pages_dirty ( ) will check that all the BIO ' s pages are still dirty .
* If they are , then fine . If , however , some pages are clean then they must
* have been written out during the direct - IO read . So we take another ref on
* the BIO and the offending pages and re - dirty the pages in process context .
*
* It is expected that bio_check_pages_dirty ( ) will wholly own the BIO from
* here on . It will run one page_cache_release ( ) against each page and will
* run one bio_put ( ) against the BIO .
*/
2006-11-22 17:55:48 +03:00
static void bio_dirty_fn ( struct work_struct * work ) ;
2005-04-17 02:20:36 +04:00
2006-11-22 17:55:48 +03:00
static DECLARE_WORK ( bio_dirty_work , bio_dirty_fn ) ;
2005-04-17 02:20:36 +04:00
static DEFINE_SPINLOCK ( bio_dirty_lock ) ;
static struct bio * bio_dirty_list ;
/*
* This runs in process context
*/
2006-11-22 17:55:48 +03:00
static void bio_dirty_fn ( struct work_struct * work )
2005-04-17 02:20:36 +04:00
{
unsigned long flags ;
struct bio * bio ;
spin_lock_irqsave ( & bio_dirty_lock , flags ) ;
bio = bio_dirty_list ;
bio_dirty_list = NULL ;
spin_unlock_irqrestore ( & bio_dirty_lock , flags ) ;
while ( bio ) {
struct bio * next = bio - > bi_private ;
bio_set_pages_dirty ( bio ) ;
bio_release_pages ( bio ) ;
bio_put ( bio ) ;
bio = next ;
}
}
void bio_check_pages_dirty ( struct bio * bio )
{
struct bio_vec * bvec = bio - > bi_io_vec ;
int nr_clean_pages = 0 ;
int i ;
for ( i = 0 ; i < bio - > bi_vcnt ; i + + ) {
struct page * page = bvec [ i ] . bv_page ;
if ( PageDirty ( page ) | | PageCompound ( page ) ) {
page_cache_release ( page ) ;
bvec [ i ] . bv_page = NULL ;
} else {
nr_clean_pages + + ;
}
}
if ( nr_clean_pages ) {
unsigned long flags ;
spin_lock_irqsave ( & bio_dirty_lock , flags ) ;
bio - > bi_private = bio_dirty_list ;
bio_dirty_list = bio ;
spin_unlock_irqrestore ( & bio_dirty_lock , flags ) ;
schedule_work ( & bio_dirty_work ) ;
} else {
bio_put ( bio ) ;
}
}
2009-11-26 11:16:19 +03:00
# if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
void bio_flush_dcache_pages ( struct bio * bi )
{
int i ;
struct bio_vec * bvec ;
bio_for_each_segment ( bvec , bi , i )
flush_dcache_page ( bvec - > bv_page ) ;
}
EXPORT_SYMBOL ( bio_flush_dcache_pages ) ;
# endif
2005-04-17 02:20:36 +04:00
/**
* bio_endio - end I / O on a bio
* @ bio : bio
* @ error : error , if any
*
* Description :
2007-09-27 14:47:43 +04:00
* bio_endio ( ) will end I / O on the whole bio . bio_endio ( ) is the
2007-09-27 14:46:13 +04:00
* preferred way to end I / O on a bio , it takes care of clearing
* BIO_UPTODATE on error . @ error is 0 on success , and and one of the
* established - Exxxx ( - EIO , for instance ) error values in case
2011-03-31 05:57:33 +04:00
* something went wrong . No one should call bi_end_io ( ) directly on a
2007-09-27 14:46:13 +04:00
* bio unless they own it and thus know that it has an end_io
* function .
2005-04-17 02:20:36 +04:00
* */
2007-09-27 14:47:43 +04:00
void bio_endio ( struct bio * bio , int error )
2005-04-17 02:20:36 +04:00
{
if ( error )
clear_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
2007-09-27 14:46:12 +04:00
else if ( ! test_bit ( BIO_UPTODATE , & bio - > bi_flags ) )
error = - EIO ;
2005-04-17 02:20:36 +04:00
2013-01-12 01:06:33 +04:00
trace_block_bio_complete ( bio , error ) ;
2007-09-27 14:46:13 +04:00
if ( bio - > bi_end_io )
2007-09-27 14:47:43 +04:00
bio - > bi_end_io ( bio , error ) ;
2005-04-17 02:20:36 +04:00
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_endio ) ;
2005-04-17 02:20:36 +04:00
void bio_pair_release ( struct bio_pair * bp )
{
if ( atomic_dec_and_test ( & bp - > cnt ) ) {
struct bio * master = bp - > bio1 . bi_private ;
2007-09-27 14:47:43 +04:00
bio_endio ( master , bp - > error ) ;
2005-04-17 02:20:36 +04:00
mempool_free ( bp , bp - > bio2 . bi_private ) ;
}
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_pair_release ) ;
2005-04-17 02:20:36 +04:00
2007-09-27 14:47:43 +04:00
static void bio_pair_end_1 ( struct bio * bi , int err )
2005-04-17 02:20:36 +04:00
{
struct bio_pair * bp = container_of ( bi , struct bio_pair , bio1 ) ;
if ( err )
bp - > error = err ;
bio_pair_release ( bp ) ;
}
2007-09-27 14:47:43 +04:00
static void bio_pair_end_2 ( struct bio * bi , int err )
2005-04-17 02:20:36 +04:00
{
struct bio_pair * bp = container_of ( bi , struct bio_pair , bio2 ) ;
if ( err )
bp - > error = err ;
bio_pair_release ( bp ) ;
}
/*
2009-01-26 04:36:14 +03:00
* split a bio - only worry about a bio with a single page in its iovec
2005-04-17 02:20:36 +04:00
*/
2008-10-09 10:57:05 +04:00
struct bio_pair * bio_split ( struct bio * bi , int first_sectors )
2005-04-17 02:20:36 +04:00
{
2008-10-09 10:57:05 +04:00
struct bio_pair * bp = mempool_alloc ( bio_split_pool , GFP_NOIO ) ;
2005-04-17 02:20:36 +04:00
if ( ! bp )
return bp ;
2008-10-30 10:34:33 +03:00
trace_block_split ( bdev_get_queue ( bi - > bi_bdev ) , bi ,
2006-03-23 22:00:26 +03:00
bi - > bi_sector + first_sectors ) ;
2012-09-28 12:38:48 +04:00
BUG_ON ( bi - > bi_vcnt ! = 1 & & bi - > bi_vcnt ! = 0 ) ;
2005-04-17 02:20:36 +04:00
BUG_ON ( bi - > bi_idx ! = 0 ) ;
atomic_set ( & bp - > cnt , 3 ) ;
bp - > error = 0 ;
bp - > bio1 = * bi ;
bp - > bio2 = * bi ;
bp - > bio2 . bi_sector + = first_sectors ;
bp - > bio2 . bi_size - = first_sectors < < 9 ;
bp - > bio1 . bi_size = first_sectors < < 9 ;
2012-09-28 12:38:48 +04:00
if ( bi - > bi_vcnt ! = 0 ) {
bp - > bv1 = bi - > bi_io_vec [ 0 ] ;
bp - > bv2 = bi - > bi_io_vec [ 0 ] ;
2012-09-18 20:19:27 +04:00
2012-09-28 12:38:48 +04:00
if ( bio_is_rw ( bi ) ) {
bp - > bv2 . bv_offset + = first_sectors < < 9 ;
bp - > bv2 . bv_len - = first_sectors < < 9 ;
bp - > bv1 . bv_len = first_sectors < < 9 ;
}
2005-04-17 02:20:36 +04:00
2012-09-28 12:38:48 +04:00
bp - > bio1 . bi_io_vec = & bp - > bv1 ;
bp - > bio2 . bi_io_vec = & bp - > bv2 ;
2005-04-17 02:20:36 +04:00
2012-09-28 12:38:48 +04:00
bp - > bio1 . bi_max_vecs = 1 ;
bp - > bio2 . bi_max_vecs = 1 ;
}
2006-05-23 09:35:27 +04:00
2005-04-17 02:20:36 +04:00
bp - > bio1 . bi_end_io = bio_pair_end_1 ;
bp - > bio2 . bi_end_io = bio_pair_end_2 ;
bp - > bio1 . bi_private = bi ;
2008-10-09 10:57:05 +04:00
bp - > bio2 . bi_private = bio_split_pool ;
2005-04-17 02:20:36 +04:00
2008-06-30 22:04:41 +04:00
if ( bio_integrity ( bi ) )
bio_integrity_split ( bi , bp , first_sectors ) ;
2005-04-17 02:20:36 +04:00
return bp ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bio_split ) ;
2005-04-17 02:20:36 +04:00
2008-10-02 06:42:53 +04:00
/**
* bio_sector_offset - Find hardware sector offset in bio
* @ bio : bio to inspect
* @ index : bio_vec index
* @ offset : offset in bv_page
*
* Return the number of hardware sectors between beginning of bio
* and an end point indicated by a bio_vec index and an offset
* within that vector ' s page .
*/
sector_t bio_sector_offset ( struct bio * bio , unsigned short index ,
unsigned int offset )
{
2009-05-23 01:17:49 +04:00
unsigned int sector_sz ;
2008-10-02 06:42:53 +04:00
struct bio_vec * bv ;
sector_t sectors ;
int i ;
2009-05-23 01:17:49 +04:00
sector_sz = queue_logical_block_size ( bio - > bi_bdev - > bd_disk - > queue ) ;
2008-10-02 06:42:53 +04:00
sectors = 0 ;
if ( index > = bio - > bi_idx )
index = bio - > bi_vcnt - 1 ;
__bio_for_each_segment ( bv , bio , i , 0 ) {
if ( i = = index ) {
if ( offset > bv - > bv_offset )
sectors + = ( offset - bv - > bv_offset ) / sector_sz ;
break ;
}
sectors + = bv - > bv_len / sector_sz ;
}
return sectors ;
}
EXPORT_SYMBOL ( bio_sector_offset ) ;
2005-04-17 02:20:36 +04:00
/*
* create memory pools for biovec ' s in a bio_set .
* use the global biovec slabs created for general use .
*/
2007-04-02 12:06:42 +04:00
static int biovec_create_pools ( struct bio_set * bs , int pool_entries )
2005-04-17 02:20:36 +04:00
{
2008-12-11 13:53:43 +03:00
struct biovec_slab * bp = bvec_slabs + BIOVEC_MAX_IDX ;
2005-04-17 02:20:36 +04:00
2008-12-11 13:53:43 +03:00
bs - > bvec_pool = mempool_create_slab_pool ( pool_entries , bp - > slab ) ;
if ( ! bs - > bvec_pool )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
static void biovec_free_pools ( struct bio_set * bs )
{
2008-12-11 13:53:43 +03:00
mempool_destroy ( bs - > bvec_pool ) ;
2005-04-17 02:20:36 +04:00
}
void bioset_free ( struct bio_set * bs )
{
if ( bs - > bio_pool )
mempool_destroy ( bs - > bio_pool ) ;
2009-06-26 17:37:49 +04:00
bioset_integrity_free ( bs ) ;
2005-04-17 02:20:36 +04:00
biovec_free_pools ( bs ) ;
2008-12-10 17:35:05 +03:00
bio_put_slab ( bs ) ;
2005-04-17 02:20:36 +04:00
kfree ( bs ) ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bioset_free ) ;
2005-04-17 02:20:36 +04:00
2008-12-10 17:35:05 +03:00
/**
* bioset_create - Create a bio_set
* @ pool_size : Number of bio and bio_vecs to cache in the mempool
* @ front_pad : Number of bytes to allocate in front of the returned bio
*
* Description :
* Set up a bio_set to be used with @ bio_alloc_bioset . Allows the caller
* to ask for a number of bytes to be allocated in front of the bio .
* Front pad allocation is useful for embedding the bio inside
* another structure , to avoid allocating extra data to go with the bio .
* Note that the bio must be embedded at the END of that structure always ,
* or things will break badly .
*/
struct bio_set * bioset_create ( unsigned int pool_size , unsigned int front_pad )
2005-04-17 02:20:36 +04:00
{
2008-12-23 14:42:54 +03:00
unsigned int back_pad = BIO_INLINE_VECS * sizeof ( struct bio_vec ) ;
2008-10-22 22:32:58 +04:00
struct bio_set * bs ;
2005-04-17 02:20:36 +04:00
2008-10-22 22:32:58 +04:00
bs = kzalloc ( sizeof ( * bs ) , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! bs )
return NULL ;
2008-12-10 17:35:05 +03:00
bs - > front_pad = front_pad ;
2008-10-22 22:32:58 +04:00
2008-12-23 14:42:54 +03:00
bs - > bio_slab = bio_find_or_create_slab ( front_pad + back_pad ) ;
2008-12-10 17:35:05 +03:00
if ( ! bs - > bio_slab ) {
kfree ( bs ) ;
return NULL ;
}
bs - > bio_pool = mempool_create_slab_pool ( pool_size , bs - > bio_slab ) ;
2005-04-17 02:20:36 +04:00
if ( ! bs - > bio_pool )
goto bad ;
2008-12-10 17:35:05 +03:00
if ( ! biovec_create_pools ( bs , pool_size ) )
2005-04-17 02:20:36 +04:00
return bs ;
bad :
bioset_free ( bs ) ;
return NULL ;
}
2009-09-26 18:19:21 +04:00
EXPORT_SYMBOL ( bioset_create ) ;
2005-04-17 02:20:36 +04:00
2012-03-06 01:15:27 +04:00
# ifdef CONFIG_BLK_CGROUP
/**
* bio_associate_current - associate a bio with % current
* @ bio : target bio
*
* Associate @ bio with % current if it hasn ' t been associated yet . Block
* layer will treat @ bio as if it were issued by % current no matter which
* task actually issues it .
*
* This function takes an extra reference of @ task ' s io_context and blkcg
* which will be put when @ bio is released . The caller must own @ bio ,
* ensure % current - > io_context exists , and is responsible for synchronizing
* calls to this function .
*/
int bio_associate_current ( struct bio * bio )
{
struct io_context * ioc ;
struct cgroup_subsys_state * css ;
if ( bio - > bi_ioc )
return - EBUSY ;
ioc = current - > io_context ;
if ( ! ioc )
return - ENOENT ;
/* acquire active ref on @ioc and associate */
get_io_context_active ( ioc ) ;
bio - > bi_ioc = ioc ;
/* associate blkcg if exists */
rcu_read_lock ( ) ;
css = task_subsys_state ( current , blkio_subsys_id ) ;
if ( css & & css_tryget ( css ) )
bio - > bi_css = css ;
rcu_read_unlock ( ) ;
return 0 ;
}
/**
* bio_disassociate_task - undo bio_associate_current ( )
* @ bio : target bio
*/
void bio_disassociate_task ( struct bio * bio )
{
if ( bio - > bi_ioc ) {
put_io_context ( bio - > bi_ioc ) ;
bio - > bi_ioc = NULL ;
}
if ( bio - > bi_css ) {
css_put ( bio - > bi_css ) ;
bio - > bi_css = NULL ;
}
}
# endif /* CONFIG_BLK_CGROUP */
2005-04-17 02:20:36 +04:00
static void __init biovec_init_slabs ( void )
{
int i ;
for ( i = 0 ; i < BIOVEC_NR_POOLS ; i + + ) {
int size ;
struct biovec_slab * bvs = bvec_slabs + i ;
2008-12-05 18:10:29 +03:00
if ( bvs - > nr_vecs < = BIO_INLINE_VECS ) {
bvs - > slab = NULL ;
continue ;
}
2005-04-17 02:20:36 +04:00
size = bvs - > nr_vecs * sizeof ( struct bio_vec ) ;
bvs - > slab = kmem_cache_create ( bvs - > name , size , 0 ,
2007-07-20 05:11:58 +04:00
SLAB_HWCACHE_ALIGN | SLAB_PANIC , NULL ) ;
2005-04-17 02:20:36 +04:00
}
}
static int __init init_bio ( void )
{
2008-12-10 17:35:05 +03:00
bio_slab_max = 2 ;
bio_slab_nr = 0 ;
bio_slabs = kzalloc ( bio_slab_max * sizeof ( struct bio_slab ) , GFP_KERNEL ) ;
if ( ! bio_slabs )
panic ( " bio: can't allocate bios \n " ) ;
2005-04-17 02:20:36 +04:00
2009-06-26 17:37:49 +04:00
bio_integrity_init ( ) ;
2005-04-17 02:20:36 +04:00
biovec_init_slabs ( ) ;
2008-12-10 17:35:05 +03:00
fs_bio_set = bioset_create ( BIO_POOL_SIZE , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( ! fs_bio_set )
panic ( " bio: can't allocate bios \n " ) ;
2011-03-17 13:11:05 +03:00
if ( bioset_integrity_create ( fs_bio_set , BIO_POOL_SIZE ) )
panic ( " bio: can't create integrity pool \n " ) ;
2006-03-26 13:37:47 +04:00
bio_split_pool = mempool_create_kmalloc_pool ( BIO_SPLIT_ENTRIES ,
sizeof ( struct bio_pair ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! bio_split_pool )
panic ( " bio: can't create split pool \n " ) ;
return 0 ;
}
subsys_initcall ( init_bio ) ;