2013-03-23 16:11:31 -07:00
/*
* Main bcache entry point - handle a read or a write request and decide what to
* do with it ; the make_request functions are called by the block layer .
*
* Copyright 2010 , 2011 Kent Overstreet < kent . overstreet @ gmail . com >
* Copyright 2012 Google , Inc .
*/
# include "bcache.h"
# include "btree.h"
# include "debug.h"
# include "request.h"
2013-06-05 06:21:07 -07:00
# include "writeback.h"
2013-03-23 16:11:31 -07:00
# include <linux/module.h>
# include <linux/hash.h>
# include <linux/random.h>
2015-05-22 17:13:32 -04:00
# include <linux/backing-dev.h>
2013-03-23 16:11:31 -07:00
# include <trace/events/bcache.h>
# define CUTOFF_CACHE_ADD 95
# define CUTOFF_CACHE_READA 90
struct kmem_cache * bch_search_cache ;
2013-10-24 17:07:04 -07:00
static void bch_data_insert_start ( struct closure * ) ;
2013-03-23 16:11:31 -07:00
static unsigned cache_mode ( struct cached_dev * dc , struct bio * bio )
{
return BDEV_CACHE_MODE ( & dc - > sb ) ;
}
static bool verify ( struct cached_dev * dc , struct bio * bio )
{
return dc - > verify ;
}
static void bio_csum ( struct bio * bio , struct bkey * k )
{
2013-11-23 17:19:00 -08:00
struct bio_vec bv ;
struct bvec_iter iter ;
2013-03-23 16:11:31 -07:00
uint64_t csum = 0 ;
2013-11-23 17:19:00 -08:00
bio_for_each_segment ( bv , bio , iter ) {
void * d = kmap ( bv . bv_page ) + bv . bv_offset ;
csum = bch_crc64_update ( csum , d , bv . bv_len ) ;
kunmap ( bv . bv_page ) ;
2013-03-23 16:11:31 -07:00
}
k - > ptr [ KEY_PTRS ( k ) ] = csum & ( ~ 0ULL > > 1 ) ;
}
/* Insert data into cache */
2013-10-24 17:07:04 -07:00
static void bch_data_insert_keys ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
2013-07-24 17:44:17 -07:00
atomic_t * journal_ref = NULL ;
2013-09-10 19:02:45 -07:00
struct bkey * replace_key = op - > replace ? & op - > replace_key : NULL ;
2013-07-24 18:06:22 -07:00
int ret ;
2013-03-23 16:11:31 -07:00
2013-10-24 17:07:04 -07:00
/*
* If we ' re looping , might already be waiting on
* another journal write - can ' t wait on more than one journal write at
* a time
*
* XXX : this looks wrong
*/
#if 0
while ( atomic_read ( & s - > cl . remaining ) & CLOSURE_WAITING )
closure_sync ( & s - > cl ) ;
# endif
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
if ( ! op - > replace )
journal_ref = bch_journal ( op - > c , & op - > insert_keys ,
op - > flush_journal ? cl : NULL ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
ret = bch_btree_insert ( op - > c , & op - > insert_keys ,
2013-07-24 18:06:22 -07:00
journal_ref , replace_key ) ;
if ( ret = = - ESRCH ) {
2013-09-10 19:02:45 -07:00
op - > replace_collision = true ;
2013-07-24 18:06:22 -07:00
} else if ( ret ) {
2013-09-10 19:02:45 -07:00
op - > error = - ENOMEM ;
op - > insert_data_done = true ;
2013-10-24 17:07:04 -07:00
}
2013-03-23 16:11:31 -07:00
2013-07-24 17:44:17 -07:00
if ( journal_ref )
atomic_dec_bug ( journal_ref ) ;
2013-03-23 16:11:31 -07:00
2015-03-06 08:37:46 -07:00
if ( ! op - > insert_data_done ) {
2014-01-09 16:03:04 -08:00
continue_at ( cl , bch_data_insert_start , op - > wq ) ;
2015-03-06 08:37:46 -07:00
return ;
}
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
bch_keylist_free ( & op - > insert_keys ) ;
2013-10-24 17:07:04 -07:00
closure_return ( cl ) ;
2013-03-23 16:11:31 -07:00
}
2013-11-11 18:20:51 -08:00
static int bch_keylist_realloc ( struct keylist * l , unsigned u64s ,
struct cache_set * c )
{
size_t oldsize = bch_keylist_nkeys ( l ) ;
size_t newsize = oldsize + u64s ;
/*
* The journalling code doesn ' t handle the case where the keys to insert
* is bigger than an empty write : If we just return - ENOMEM here ,
* bio_insert ( ) and bio_invalidate ( ) will insert the keys created so far
* and finish the rest when the keylist is empty .
*/
if ( newsize * sizeof ( uint64_t ) > block_bytes ( c ) - sizeof ( struct jset ) )
return - ENOMEM ;
return __bch_keylist_realloc ( l , u64s ) ;
}
2013-10-24 17:07:04 -07:00
static void bch_data_invalidate ( struct closure * cl )
{
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
struct bio * bio = op - > bio ;
2013-10-24 17:07:04 -07:00
pr_debug ( " invalidating %i sectors from %llu " ,
2013-10-11 15:44:27 -07:00
bio_sectors ( bio ) , ( uint64_t ) bio - > bi_iter . bi_sector ) ;
2013-10-24 17:07:04 -07:00
while ( bio_sectors ( bio ) ) {
2013-10-31 15:46:42 -07:00
unsigned sectors = min ( bio_sectors ( bio ) ,
1U < < ( KEY_SIZE_BITS - 1 ) ) ;
2013-10-24 17:07:04 -07:00
2013-11-11 18:20:51 -08:00
if ( bch_keylist_realloc ( & op - > insert_keys , 2 , op - > c ) )
2013-10-24 17:07:04 -07:00
goto out ;
2013-10-11 15:44:27 -07:00
bio - > bi_iter . bi_sector + = sectors ;
bio - > bi_iter . bi_size - = sectors < < 9 ;
2013-10-24 17:07:04 -07:00
2013-09-10 19:02:45 -07:00
bch_keylist_add ( & op - > insert_keys ,
2013-10-11 15:44:27 -07:00
& KEY ( op - > inode , bio - > bi_iter . bi_sector , sectors ) ) ;
2013-10-24 17:07:04 -07:00
}
2013-09-10 19:02:45 -07:00
op - > insert_data_done = true ;
2013-10-24 17:07:04 -07:00
bio_put ( bio ) ;
out :
2014-01-09 16:03:04 -08:00
continue_at ( cl , bch_data_insert_keys , op - > wq ) ;
2013-10-24 17:07:04 -07:00
}
static void bch_data_insert_error ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
2013-03-23 16:11:31 -07:00
/*
* Our data write just errored , which means we ' ve got a bunch of keys to
* insert that point to data that wasn ' t succesfully written .
*
* We don ' t have to insert those keys but we still have to invalidate
* that region of the cache - so , if we just strip off all the pointers
* from the keys we ' ll accomplish just that .
*/
2013-09-10 19:02:45 -07:00
struct bkey * src = op - > insert_keys . keys , * dst = op - > insert_keys . keys ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
while ( src ! = op - > insert_keys . top ) {
2013-03-23 16:11:31 -07:00
struct bkey * n = bkey_next ( src ) ;
SET_KEY_PTRS ( src , 0 ) ;
2013-07-24 17:24:25 -07:00
memmove ( dst , src , bkey_bytes ( src ) ) ;
2013-03-23 16:11:31 -07:00
dst = bkey_next ( dst ) ;
src = n ;
}
2013-09-10 19:02:45 -07:00
op - > insert_keys . top = dst ;
2013-03-23 16:11:31 -07:00
2013-10-24 17:07:04 -07:00
bch_data_insert_keys ( cl ) ;
2013-03-23 16:11:31 -07:00
}
2015-07-20 15:29:37 +02:00
static void bch_data_insert_endio ( struct bio * bio )
2013-03-23 16:11:31 -07:00
{
struct closure * cl = bio - > bi_private ;
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
2013-03-23 16:11:31 -07:00
2015-07-20 15:29:37 +02:00
if ( bio - > bi_error ) {
2013-03-23 16:11:31 -07:00
/* TODO: We could try to recover from this. */
2013-09-10 19:02:45 -07:00
if ( op - > writeback )
2015-07-20 15:29:37 +02:00
op - > error = bio - > bi_error ;
2013-09-10 19:02:45 -07:00
else if ( ! op - > replace )
2014-01-09 16:03:04 -08:00
set_closure_fn ( cl , bch_data_insert_error , op - > wq ) ;
2013-03-23 16:11:31 -07:00
else
set_closure_fn ( cl , NULL , NULL ) ;
}
2015-07-20 15:29:37 +02:00
bch_bbio_endio ( op - > c , bio , bio - > bi_error , " writing data to cache " ) ;
2013-03-23 16:11:31 -07:00
}
2013-10-24 17:07:04 -07:00
static void bch_data_insert_start ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
struct bio * bio = op - > bio , * n ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
if ( atomic_sub_return ( bio_sectors ( bio ) , & op - > c - > sectors_to_gc ) < 0 ) {
set_gc_sectors ( op - > c ) ;
wake_up_gc ( op - > c ) ;
2013-03-23 16:11:31 -07:00
}
2013-12-12 12:53:28 -08:00
if ( op - > bypass )
return bch_data_invalidate ( cl ) ;
2013-07-10 18:44:40 -07:00
/*
2016-06-05 14:32:25 -05:00
* Journal writes are marked REQ_PREFLUSH ; if the original write was a
2013-07-10 18:44:40 -07:00
* flush , it ' ll wait on the journal write .
*/
2016-08-05 15:35:16 -06:00
bio - > bi_opf & = ~ ( REQ_PREFLUSH | REQ_FUA ) ;
2013-07-10 18:44:40 -07:00
2013-03-23 16:11:31 -07:00
do {
unsigned i ;
struct bkey * k ;
2013-09-10 19:02:45 -07:00
struct bio_set * split = op - > c - > bio_split ;
2013-03-23 16:11:31 -07:00
/* 1 for the device pointer and 1 for the chksum */
2013-09-10 19:02:45 -07:00
if ( bch_keylist_realloc ( & op - > insert_keys ,
2013-11-11 18:20:51 -08:00
3 + ( op - > csum ? 1 : 0 ) ,
2015-03-06 08:37:46 -07:00
op - > c ) ) {
2014-01-09 16:03:04 -08:00
continue_at ( cl , bch_data_insert_keys , op - > wq ) ;
2015-03-06 08:37:46 -07:00
return ;
}
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
k = op - > insert_keys . top ;
2013-03-23 16:11:31 -07:00
bkey_init ( k ) ;
2013-09-10 19:02:45 -07:00
SET_KEY_INODE ( k , op - > inode ) ;
2013-10-11 15:44:27 -07:00
SET_KEY_OFFSET ( k , bio - > bi_iter . bi_sector ) ;
2013-03-23 16:11:31 -07:00
2013-07-24 18:11:11 -07:00
if ( ! bch_alloc_sectors ( op - > c , k , bio_sectors ( bio ) ,
op - > write_point , op - > write_prio ,
op - > writeback ) )
2013-03-23 16:11:31 -07:00
goto err ;
2013-11-23 18:21:01 -08:00
n = bio_next_split ( bio , KEY_SIZE ( k ) , GFP_NOIO , split ) ;
2013-03-23 16:11:31 -07:00
2013-10-24 17:07:04 -07:00
n - > bi_end_io = bch_data_insert_endio ;
2013-03-23 16:11:31 -07:00
n - > bi_private = cl ;
2013-09-10 19:02:45 -07:00
if ( op - > writeback ) {
2013-03-23 16:11:31 -07:00
SET_KEY_DIRTY ( k , true ) ;
for ( i = 0 ; i < KEY_PTRS ( k ) ; i + + )
2013-09-10 19:02:45 -07:00
SET_GC_MARK ( PTR_BUCKET ( op - > c , k , i ) ,
2013-03-23 16:11:31 -07:00
GC_MARK_DIRTY ) ;
}
2013-09-10 19:02:45 -07:00
SET_KEY_CSUM ( k , op - > csum ) ;
2013-03-23 16:11:31 -07:00
if ( KEY_CSUM ( k ) )
bio_csum ( n , k ) ;
2013-04-26 15:39:55 -07:00
trace_bcache_cache_insert ( k ) ;
2013-09-10 19:02:45 -07:00
bch_keylist_push ( & op - > insert_keys ) ;
2013-03-23 16:11:31 -07:00
2016-06-05 14:32:05 -05:00
bio_set_op_attrs ( n , REQ_OP_WRITE , 0 ) ;
2013-09-10 19:02:45 -07:00
bch_submit_bbio ( n , op - > c , k , 0 ) ;
2013-03-23 16:11:31 -07:00
} while ( n ! = bio ) ;
2013-09-10 19:02:45 -07:00
op - > insert_data_done = true ;
2014-01-09 16:03:04 -08:00
continue_at ( cl , bch_data_insert_keys , op - > wq ) ;
2015-03-06 08:37:46 -07:00
return ;
2013-03-23 16:11:31 -07:00
err :
/* bch_alloc_sectors() blocks if s->writeback = true */
2013-09-10 19:02:45 -07:00
BUG_ON ( op - > writeback ) ;
2013-03-23 16:11:31 -07:00
/*
* But if it ' s not a writeback write we ' d rather just bail out if
* there aren ' t any buckets ready to write to - it might take awhile and
* we might be starving btree writes for gc or something .
*/
2013-09-10 19:02:45 -07:00
if ( ! op - > replace ) {
2013-03-23 16:11:31 -07:00
/*
* Writethrough write : We can ' t complete the write until we ' ve
* updated the index . But we don ' t want to delay the write while
* we wait for buckets to be freed up , so just invalidate the
* rest of the write .
*/
2013-09-10 19:02:45 -07:00
op - > bypass = true ;
2013-10-24 17:07:04 -07:00
return bch_data_invalidate ( cl ) ;
2013-03-23 16:11:31 -07:00
} else {
/*
* From a cache miss , we can just insert the keys for the data
* we have written or bail out if we didn ' t do anything .
*/
2013-09-10 19:02:45 -07:00
op - > insert_data_done = true ;
2013-03-23 16:11:31 -07:00
bio_put ( bio ) ;
2013-09-10 19:02:45 -07:00
if ( ! bch_keylist_empty ( & op - > insert_keys ) )
2014-01-09 16:03:04 -08:00
continue_at ( cl , bch_data_insert_keys , op - > wq ) ;
2013-03-23 16:11:31 -07:00
else
closure_return ( cl ) ;
}
}
/**
2013-10-24 17:07:04 -07:00
* bch_data_insert - stick some data in the cache
2013-03-23 16:11:31 -07:00
*
* This is the starting point for any data to end up in a cache device ; it could
* be from a normal write , or a writeback write , or a write to a flash only
* volume - it ' s also used by the moving garbage collector to compact data in
* mostly empty buckets .
*
* It first writes the data to the cache , creating a list of keys to be inserted
* ( if the data had to be fragmented there will be multiple keys ) ; after the
* data is written it calls bch_journal , and after the keys have been added to
* the next journal write they ' re inserted into the btree .
*
2013-07-24 17:44:17 -07:00
* It inserts the data in s - > cache_bio ; bi_sector is used for the key offset ,
2013-03-23 16:11:31 -07:00
* and op - > inode is used for the key inode .
*
2013-07-24 17:44:17 -07:00
* If s - > bypass is true , instead of inserting the data it invalidates the
* region of the cache represented by s - > cache_bio and op - > inode .
2013-03-23 16:11:31 -07:00
*/
2013-10-24 17:07:04 -07:00
void bch_data_insert ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
2013-09-10 19:02:45 -07:00
struct data_insert_op * op = container_of ( cl , struct data_insert_op , cl ) ;
2013-03-23 16:11:31 -07:00
2014-05-22 12:14:24 -07:00
trace_bcache_write ( op - > c , op - > inode , op - > bio ,
op - > writeback , op - > bypass ) ;
2013-09-10 19:02:45 -07:00
bch_keylist_init ( & op - > insert_keys ) ;
bio_get ( op - > bio ) ;
2013-10-24 17:07:04 -07:00
bch_data_insert_start ( cl ) ;
2013-03-23 16:11:31 -07:00
}
2013-09-10 19:02:45 -07:00
/* Congested? */
unsigned bch_get_congested ( struct cache_set * c )
{
int i ;
long rand ;
if ( ! c - > congested_read_threshold_us & &
! c - > congested_write_threshold_us )
return 0 ;
i = ( local_clock_us ( ) - c - > congested_last_us ) / 1024 ;
if ( i < 0 )
return 0 ;
i + = atomic_read ( & c - > congested ) ;
if ( i > = 0 )
return 0 ;
i + = CONGESTED_MAX ;
if ( i > 0 )
i = fract_exp_two ( i , 6 ) ;
rand = get_random_int ( ) ;
i - = bitmap_weight ( & rand , BITS_PER_LONG ) ;
return i > 0 ? i : 1 ;
}
static void add_sequential ( struct task_struct * t )
{
ewma_add ( t - > sequential_io_avg ,
t - > sequential_io , 8 , 0 ) ;
t - > sequential_io = 0 ;
}
static struct hlist_head * iohash ( struct cached_dev * dc , uint64_t k )
{
return & dc - > io_hash [ hash_64 ( k , RECENT_IO_BITS ) ] ;
}
static bool check_should_bypass ( struct cached_dev * dc , struct bio * bio )
{
struct cache_set * c = dc - > disk . c ;
unsigned mode = cache_mode ( dc , bio ) ;
unsigned sectors , congested = bch_get_congested ( c ) ;
struct task_struct * task = current ;
2013-07-30 22:34:40 -07:00
struct io * i ;
2013-09-10 19:02:45 -07:00
2013-08-21 17:49:09 -07:00
if ( test_bit ( BCACHE_DEV_DETACHING , & dc - > disk . flags ) | |
2013-09-10 19:02:45 -07:00
c - > gc_stats . in_use > CUTOFF_CACHE_ADD | |
2016-06-05 14:32:05 -05:00
( bio_op ( bio ) = = REQ_OP_DISCARD ) )
2013-09-10 19:02:45 -07:00
goto skip ;
if ( mode = = CACHE_MODE_NONE | |
( mode = = CACHE_MODE_WRITEAROUND & &
2016-06-05 14:31:47 -05:00
op_is_write ( bio_op ( bio ) ) ) )
2013-09-10 19:02:45 -07:00
goto skip ;
2013-10-11 15:44:27 -07:00
if ( bio - > bi_iter . bi_sector & ( c - > sb . block_size - 1 ) | |
2013-09-10 19:02:45 -07:00
bio_sectors ( bio ) & ( c - > sb . block_size - 1 ) ) {
pr_debug ( " skipping unaligned io " ) ;
goto skip ;
}
2013-09-10 14:27:42 -07:00
if ( bypass_torture_test ( dc ) ) {
if ( ( get_random_int ( ) & 3 ) = = 3 )
goto skip ;
else
goto rescale ;
}
2013-09-10 19:02:45 -07:00
if ( ! congested & & ! dc - > sequential_cutoff )
goto rescale ;
if ( ! congested & &
mode = = CACHE_MODE_WRITEBACK & &
2016-11-01 07:40:05 -06:00
op_is_write ( bio - > bi_opf ) & &
op_is_sync ( bio - > bi_opf ) )
2013-09-10 19:02:45 -07:00
goto rescale ;
2013-07-30 22:34:40 -07:00
spin_lock ( & dc - > io_lock ) ;
2013-09-10 19:02:45 -07:00
2013-10-11 15:44:27 -07:00
hlist_for_each_entry ( i , iohash ( dc , bio - > bi_iter . bi_sector ) , hash )
if ( i - > last = = bio - > bi_iter . bi_sector & &
2013-07-30 22:34:40 -07:00
time_before ( jiffies , i - > jiffies ) )
goto found ;
2013-09-10 19:02:45 -07:00
2013-07-30 22:34:40 -07:00
i = list_first_entry ( & dc - > io_lru , struct io , lru ) ;
2013-09-10 19:02:45 -07:00
2013-07-30 22:34:40 -07:00
add_sequential ( task ) ;
i - > sequential = 0 ;
2013-09-10 19:02:45 -07:00
found :
2013-10-11 15:44:27 -07:00
if ( i - > sequential + bio - > bi_iter . bi_size > i - > sequential )
i - > sequential + = bio - > bi_iter . bi_size ;
2013-09-10 19:02:45 -07:00
2013-07-30 22:34:40 -07:00
i - > last = bio_end_sector ( bio ) ;
i - > jiffies = jiffies + msecs_to_jiffies ( 5000 ) ;
task - > sequential_io = i - > sequential ;
2013-09-10 19:02:45 -07:00
2013-07-30 22:34:40 -07:00
hlist_del ( & i - > hash ) ;
hlist_add_head ( & i - > hash , iohash ( dc , i - > last ) ) ;
list_move_tail ( & i - > lru , & dc - > io_lru ) ;
2013-09-10 19:02:45 -07:00
2013-07-30 22:34:40 -07:00
spin_unlock ( & dc - > io_lock ) ;
2013-09-10 19:02:45 -07:00
sectors = max ( task - > sequential_io ,
task - > sequential_io_avg ) > > 9 ;
if ( dc - > sequential_cutoff & &
sectors > = dc - > sequential_cutoff > > 9 ) {
trace_bcache_bypass_sequential ( bio ) ;
goto skip ;
}
if ( congested & & sectors > = congested ) {
trace_bcache_bypass_congested ( bio ) ;
goto skip ;
}
rescale :
bch_rescale_priorities ( c , bio_sectors ( bio ) ) ;
return false ;
skip :
bch_mark_sectors_bypassed ( c , dc , bio_sectors ( bio ) ) ;
return true ;
}
2013-07-24 17:41:08 -07:00
/* Cache lookup */
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
struct search {
/* Stack frame for bio_complete */
struct closure cl ;
struct bbio bio ;
struct bio * orig_bio ;
struct bio * cache_miss ;
2013-09-10 19:16:31 -07:00
struct bcache_device * d ;
2013-09-10 19:02:45 -07:00
unsigned insert_bio_sectors ;
unsigned recoverable : 1 ;
unsigned write : 1 ;
2013-09-10 14:27:42 -07:00
unsigned read_dirty_data : 1 ;
2013-09-10 19:02:45 -07:00
unsigned long start_time ;
struct btree_op op ;
struct data_insert_op iop ;
} ;
2015-07-20 15:29:37 +02:00
static void bch_cache_read_endio ( struct bio * bio )
2013-03-23 16:11:31 -07:00
{
struct bbio * b = container_of ( bio , struct bbio , bio ) ;
struct closure * cl = bio - > bi_private ;
struct search * s = container_of ( cl , struct search , cl ) ;
/*
* If the bucket was reused while our bio was in flight , we might have
* read the wrong data . Set s - > error but not error so it doesn ' t get
* counted against the cache device , but we ' ll still reread the data
* from the backing device .
*/
2015-07-20 15:29:37 +02:00
if ( bio - > bi_error )
s - > iop . error = bio - > bi_error ;
2013-08-09 21:14:13 -07:00
else if ( ! KEY_DIRTY ( & b - > key ) & &
ptr_stale ( s - > iop . c , & b - > key , 0 ) ) {
2013-09-10 19:02:45 -07:00
atomic_long_inc ( & s - > iop . c - > cache_read_races ) ;
s - > iop . error = - EINTR ;
2013-03-23 16:11:31 -07:00
}
2015-07-20 15:29:37 +02:00
bch_bbio_endio ( s - > iop . c , bio , bio - > bi_error , " reading from cache " ) ;
2013-03-23 16:11:31 -07:00
}
2013-07-24 17:41:08 -07:00
/*
* Read from a single key , handling the initial cache miss if the key starts in
* the middle of the bio
*/
2013-07-24 17:41:13 -07:00
static int cache_lookup_fn ( struct btree_op * op , struct btree * b , struct bkey * k )
2013-07-24 17:41:08 -07:00
{
struct search * s = container_of ( op , struct search , op ) ;
2013-07-24 17:41:13 -07:00
struct bio * n , * bio = & s - > bio . bio ;
struct bkey * bio_key ;
2013-07-24 17:41:08 -07:00
unsigned ptr ;
2013-10-11 15:44:27 -07:00
if ( bkey_cmp ( k , & KEY ( s - > iop . inode , bio - > bi_iter . bi_sector , 0 ) ) < = 0 )
2013-07-24 17:41:13 -07:00
return MAP_CONTINUE ;
2013-09-10 19:02:45 -07:00
if ( KEY_INODE ( k ) ! = s - > iop . inode | |
2013-10-11 15:44:27 -07:00
KEY_START ( k ) > bio - > bi_iter . bi_sector ) {
2013-07-24 17:41:13 -07:00
unsigned bio_sectors = bio_sectors ( bio ) ;
2013-09-10 19:02:45 -07:00
unsigned sectors = KEY_INODE ( k ) = = s - > iop . inode
2013-07-24 17:41:13 -07:00
? min_t ( uint64_t , INT_MAX ,
2013-10-11 15:44:27 -07:00
KEY_START ( k ) - bio - > bi_iter . bi_sector )
2013-07-24 17:41:13 -07:00
: INT_MAX ;
int ret = s - > d - > cache_miss ( b , s , bio , sectors ) ;
if ( ret ! = MAP_CONTINUE )
return ret ;
/* if this was a complete miss we shouldn't get here */
BUG_ON ( bio_sectors < = sectors ) ;
}
if ( ! KEY_SIZE ( k ) )
return MAP_CONTINUE ;
2013-07-24 17:41:08 -07:00
/* XXX: figure out best pointer - for multiple cache devices */
ptr = 0 ;
PTR_BUCKET ( b - > c , k , ptr ) - > prio = INITIAL_PRIO ;
2013-09-10 14:27:42 -07:00
if ( KEY_DIRTY ( k ) )
s - > read_dirty_data = true ;
2013-11-23 18:21:01 -08:00
n = bio_next_split ( bio , min_t ( uint64_t , INT_MAX ,
KEY_OFFSET ( k ) - bio - > bi_iter . bi_sector ) ,
GFP_NOIO , s - > d - > bio_split ) ;
2013-07-24 17:41:08 -07:00
2013-07-24 17:41:13 -07:00
bio_key = & container_of ( n , struct bbio , bio ) - > key ;
bch_bkey_copy_single_ptr ( bio_key , k , ptr ) ;
2013-07-24 17:41:08 -07:00
2013-10-11 15:44:27 -07:00
bch_cut_front ( & KEY ( s - > iop . inode , n - > bi_iter . bi_sector , 0 ) , bio_key ) ;
2013-09-10 19:02:45 -07:00
bch_cut_back ( & KEY ( s - > iop . inode , bio_end_sector ( n ) , 0 ) , bio_key ) ;
2013-07-24 17:41:08 -07:00
2013-07-24 17:41:13 -07:00
n - > bi_end_io = bch_cache_read_endio ;
n - > bi_private = & s - > cl ;
2013-07-24 17:41:08 -07:00
2013-07-24 17:41:13 -07:00
/*
* The bucket we ' re reading from might be reused while our bio
* is in flight , and we could then end up reading the wrong
* data .
*
* We guard against this by checking ( in cache_read_endio ( ) ) if
* the pointer is stale again ; if so , we treat it as an error
* and reread from the backing device ( but we don ' t pass that
* error up anywhere ) .
*/
2013-07-24 17:41:08 -07:00
2013-07-24 17:41:13 -07:00
__bch_submit_bbio ( n , b - > c ) ;
return n = = bio ? MAP_DONE : MAP_CONTINUE ;
2013-07-24 17:41:08 -07:00
}
static void cache_lookup ( struct closure * cl )
{
2013-09-10 19:02:45 -07:00
struct search * s = container_of ( cl , struct search , iop . cl ) ;
2013-07-24 17:41:08 -07:00
struct bio * bio = & s - > bio . bio ;
2013-09-10 19:16:31 -07:00
int ret ;
2013-07-24 17:41:08 -07:00
2013-09-10 19:16:31 -07:00
bch_btree_op_init ( & s - > op , - 1 ) ;
2013-07-24 17:41:08 -07:00
2013-09-10 19:16:31 -07:00
ret = bch_btree_map_keys ( & s - > op , s - > iop . c ,
& KEY ( s - > iop . inode , bio - > bi_iter . bi_sector , 0 ) ,
cache_lookup_fn , MAP_END_KEY ) ;
2015-03-06 08:37:46 -07:00
if ( ret = = - EAGAIN ) {
2013-07-24 17:41:08 -07:00
continue_at ( cl , cache_lookup , bcache_wq ) ;
2015-03-06 08:37:46 -07:00
return ;
}
2013-07-24 17:41:08 -07:00
closure_return ( cl ) ;
}
/* Common code for the make_request functions */
2015-07-20 15:29:37 +02:00
static void request_endio ( struct bio * bio )
2013-07-24 17:41:08 -07:00
{
struct closure * cl = bio - > bi_private ;
2015-07-20 15:29:37 +02:00
if ( bio - > bi_error ) {
2013-07-24 17:41:08 -07:00
struct search * s = container_of ( cl , struct search , cl ) ;
2015-07-20 15:29:37 +02:00
s - > iop . error = bio - > bi_error ;
2013-07-24 17:41:08 -07:00
/* Only cache read errors are recoverable */
s - > recoverable = false ;
}
bio_put ( bio ) ;
closure_put ( cl ) ;
}
2013-03-23 16:11:31 -07:00
static void bio_complete ( struct search * s )
{
if ( s - > orig_bio ) {
2014-11-24 11:05:24 +08:00
generic_end_io_acct ( bio_data_dir ( s - > orig_bio ) ,
& s - > d - > disk - > part0 , s - > start_time ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
trace_bcache_request_end ( s - > d , s - > orig_bio ) ;
2015-07-20 15:29:37 +02:00
s - > orig_bio - > bi_error = s - > iop . error ;
bio_endio ( s - > orig_bio ) ;
2013-03-23 16:11:31 -07:00
s - > orig_bio = NULL ;
}
}
2013-09-10 19:16:31 -07:00
static void do_bio_hook ( struct search * s , struct bio * orig_bio )
2013-03-23 16:11:31 -07:00
{
struct bio * bio = & s - > bio . bio ;
2013-11-22 19:37:48 -08:00
bio_init ( bio ) ;
2013-09-10 19:16:31 -07:00
__bio_clone_fast ( bio , orig_bio ) ;
2013-03-23 16:11:31 -07:00
bio - > bi_end_io = request_endio ;
bio - > bi_private = & s - > cl ;
2013-11-22 19:37:48 -08:00
2015-04-17 16:23:59 -06:00
bio_cnt_set ( bio , 3 ) ;
2013-03-23 16:11:31 -07:00
}
static void search_free ( struct closure * cl )
{
struct search * s = container_of ( cl , struct search , cl ) ;
bio_complete ( s ) ;
2013-09-10 19:02:45 -07:00
if ( s - > iop . bio )
bio_put ( s - > iop . bio ) ;
2013-03-23 16:11:31 -07:00
closure_debug_destroy ( cl ) ;
mempool_free ( s , s - > d - > c - > search ) ;
}
2013-09-10 19:16:31 -07:00
static inline struct search * search_alloc ( struct bio * bio ,
struct bcache_device * d )
2013-03-23 16:11:31 -07:00
{
2013-07-24 17:26:51 -07:00
struct search * s ;
s = mempool_alloc ( d - > c - > search , GFP_NOIO ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:16:31 -07:00
closure_init ( & s - > cl , NULL ) ;
do_bio_hook ( s , bio ) ;
2013-03-23 16:11:31 -07:00
s - > orig_bio = bio ;
2013-09-10 19:16:31 -07:00
s - > cache_miss = NULL ;
s - > d = d ;
2013-03-23 16:11:31 -07:00
s - > recoverable = 1 ;
2016-06-05 14:31:47 -05:00
s - > write = op_is_write ( bio_op ( bio ) ) ;
2013-09-10 19:16:31 -07:00
s - > read_dirty_data = 0 ;
2013-03-23 16:11:31 -07:00
s - > start_time = jiffies ;
2013-09-10 19:16:31 -07:00
s - > iop . c = d - > c ;
s - > iop . bio = NULL ;
s - > iop . inode = d - > id ;
s - > iop . write_point = hash_long ( ( unsigned long ) current , 16 ) ;
s - > iop . write_prio = 0 ;
s - > iop . error = 0 ;
s - > iop . flags = 0 ;
2016-08-05 15:35:16 -06:00
s - > iop . flush_journal = ( bio - > bi_opf & ( REQ_PREFLUSH | REQ_FUA ) ) ! = 0 ;
2014-01-09 16:03:04 -08:00
s - > iop . wq = bcache_wq ;
2013-03-23 16:11:31 -07:00
return s ;
}
/* Cached devices */
static void cached_dev_bio_complete ( struct closure * cl )
{
struct search * s = container_of ( cl , struct search , cl ) ;
struct cached_dev * dc = container_of ( s - > d , struct cached_dev , disk ) ;
search_free ( cl ) ;
cached_dev_put ( dc ) ;
}
/* Process reads */
2013-09-10 17:06:17 -07:00
static void cached_dev_cache_miss_done ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
struct search * s = container_of ( cl , struct search , cl ) ;
2013-09-10 19:02:45 -07:00
if ( s - > iop . replace_collision )
bch_mark_cache_miss_collision ( s - > iop . c , s - > d ) ;
2013-03-23 16:11:31 -07:00
2016-09-22 03:10:01 -04:00
if ( s - > iop . bio )
bio_free_pages ( s - > iop . bio ) ;
2013-03-23 16:11:31 -07:00
cached_dev_bio_complete ( cl ) ;
}
2013-09-10 17:06:17 -07:00
static void cached_dev_read_error ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
struct search * s = container_of ( cl , struct search , cl ) ;
2013-09-10 17:06:17 -07:00
struct bio * bio = & s - > bio . bio ;
2013-03-23 16:11:31 -07:00
if ( s - > recoverable ) {
2013-04-26 15:39:55 -07:00
/* Retry from the backing device: */
trace_bcache_read_retry ( s - > orig_bio ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
s - > iop . error = 0 ;
2013-09-10 19:16:31 -07:00
do_bio_hook ( s , s - > orig_bio ) ;
2013-03-23 16:11:31 -07:00
/* XXX: invalidate cache */
2013-11-23 23:11:25 -08:00
closure_bio_submit ( bio , cl ) ;
2013-03-23 16:11:31 -07:00
}
2013-09-10 17:06:17 -07:00
continue_at ( cl , cached_dev_cache_miss_done , NULL ) ;
2013-03-23 16:11:31 -07:00
}
2013-09-10 17:06:17 -07:00
static void cached_dev_read_done ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
struct search * s = container_of ( cl , struct search , cl ) ;
struct cached_dev * dc = container_of ( s - > d , struct cached_dev , disk ) ;
/*
2013-09-10 17:06:17 -07:00
* We had a cache miss ; cache_bio now contains data ready to be inserted
* into the cache .
2013-03-23 16:11:31 -07:00
*
* First , we copy the data we just read from cache_bio ' s bounce buffers
* to the buffers the original bio pointed to :
*/
2013-09-10 19:02:45 -07:00
if ( s - > iop . bio ) {
bio_reset ( s - > iop . bio ) ;
2013-10-11 15:44:27 -07:00
s - > iop . bio - > bi_iter . bi_sector = s - > cache_miss - > bi_iter . bi_sector ;
2013-09-10 19:02:45 -07:00
s - > iop . bio - > bi_bdev = s - > cache_miss - > bi_bdev ;
2013-10-11 15:44:27 -07:00
s - > iop . bio - > bi_iter . bi_size = s - > insert_bio_sectors < < 9 ;
2013-09-10 19:02:45 -07:00
bch_bio_map ( s - > iop . bio , NULL ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
bio_copy_data ( s - > cache_miss , s - > iop . bio ) ;
2013-03-23 16:11:31 -07:00
bio_put ( s - > cache_miss ) ;
s - > cache_miss = NULL ;
}
2013-11-22 19:37:48 -08:00
if ( verify ( dc , & s - > bio . bio ) & & s - > recoverable & & ! s - > read_dirty_data )
2013-09-10 19:02:45 -07:00
bch_data_verify ( dc , s - > orig_bio ) ;
2013-03-23 16:11:31 -07:00
bio_complete ( s ) ;
2013-09-10 19:02:45 -07:00
if ( s - > iop . bio & &
! test_bit ( CACHE_SET_STOPPING , & s - > iop . c - > flags ) ) {
BUG_ON ( ! s - > iop . replace ) ;
closure_call ( & s - > iop . cl , bch_data_insert , NULL , cl ) ;
2013-03-23 16:11:31 -07:00
}
2013-09-10 17:06:17 -07:00
continue_at ( cl , cached_dev_cache_miss_done , NULL ) ;
2013-03-23 16:11:31 -07:00
}
2013-09-10 17:06:17 -07:00
static void cached_dev_read_done_bh ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
struct search * s = container_of ( cl , struct search , cl ) ;
struct cached_dev * dc = container_of ( s - > d , struct cached_dev , disk ) ;
2013-09-10 19:02:45 -07:00
bch_mark_cache_accounting ( s - > iop . c , s - > d ,
! s - > cache_miss , s - > iop . bypass ) ;
trace_bcache_read ( s - > orig_bio , ! s - > cache_miss , s - > iop . bypass ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
if ( s - > iop . error )
2013-09-10 17:06:17 -07:00
continue_at_nobarrier ( cl , cached_dev_read_error , bcache_wq ) ;
2013-09-10 19:02:45 -07:00
else if ( s - > iop . bio | | verify ( dc , & s - > bio . bio ) )
2013-09-10 17:06:17 -07:00
continue_at_nobarrier ( cl , cached_dev_read_done , bcache_wq ) ;
2013-03-23 16:11:31 -07:00
else
2013-09-10 17:06:17 -07:00
continue_at_nobarrier ( cl , cached_dev_bio_complete , NULL ) ;
2013-03-23 16:11:31 -07:00
}
static int cached_dev_cache_miss ( struct btree * b , struct search * s ,
struct bio * bio , unsigned sectors )
{
2013-07-24 17:41:08 -07:00
int ret = MAP_CONTINUE ;
2013-09-10 18:39:16 -07:00
unsigned reada = 0 ;
2013-03-23 16:11:31 -07:00
struct cached_dev * dc = container_of ( s - > d , struct cached_dev , disk ) ;
2013-09-10 17:06:17 -07:00
struct bio * miss , * cache_bio ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
if ( s - > cache_miss | | s - > iop . bypass ) {
2013-11-23 18:21:01 -08:00
miss = bio_next_split ( bio , sectors , GFP_NOIO , s - > d - > bio_split ) ;
2013-07-24 17:41:08 -07:00
ret = miss = = bio ? MAP_DONE : MAP_CONTINUE ;
2013-09-10 18:39:16 -07:00
goto out_submit ;
}
2013-03-23 16:11:31 -07:00
2016-08-05 15:35:16 -06:00
if ( ! ( bio - > bi_opf & REQ_RAHEAD ) & &
! ( bio - > bi_opf & REQ_META ) & &
2013-09-10 19:02:45 -07:00
s - > iop . c - > gc_stats . in_use < CUTOFF_CACHE_READA )
2013-09-10 18:39:16 -07:00
reada = min_t ( sector_t , dc - > readahead > > 9 ,
bdev_sectors ( bio - > bi_bdev ) - bio_end_sector ( bio ) ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
s - > insert_bio_sectors = min ( sectors , bio_sectors ( bio ) + reada ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
s - > iop . replace_key = KEY ( s - > iop . inode ,
2013-10-11 15:44:27 -07:00
bio - > bi_iter . bi_sector + s - > insert_bio_sectors ,
2013-09-10 19:02:45 -07:00
s - > insert_bio_sectors ) ;
2013-09-10 18:39:16 -07:00
2013-09-10 19:02:45 -07:00
ret = bch_btree_insert_check_key ( b , & s - > op , & s - > iop . replace_key ) ;
2013-09-10 18:39:16 -07:00
if ( ret )
return ret ;
2013-09-10 19:02:45 -07:00
s - > iop . replace = true ;
2013-09-10 18:52:54 -07:00
2013-11-23 18:21:01 -08:00
miss = bio_next_split ( bio , sectors , GFP_NOIO , s - > d - > bio_split ) ;
2013-07-24 17:41:08 -07:00
/* btree_search_recurse()'s btree iterator is no good anymore */
ret = miss = = bio ? MAP_DONE : - EINTR ;
2013-03-23 16:11:31 -07:00
2013-09-10 17:06:17 -07:00
cache_bio = bio_alloc_bioset ( GFP_NOWAIT ,
2013-09-10 19:02:45 -07:00
DIV_ROUND_UP ( s - > insert_bio_sectors , PAGE_SECTORS ) ,
2013-03-23 16:11:31 -07:00
dc - > disk . bio_split ) ;
2013-09-10 17:06:17 -07:00
if ( ! cache_bio )
2013-03-23 16:11:31 -07:00
goto out_submit ;
2013-10-11 15:44:27 -07:00
cache_bio - > bi_iter . bi_sector = miss - > bi_iter . bi_sector ;
cache_bio - > bi_bdev = miss - > bi_bdev ;
cache_bio - > bi_iter . bi_size = s - > insert_bio_sectors < < 9 ;
2013-03-23 16:11:31 -07:00
2013-09-10 17:06:17 -07:00
cache_bio - > bi_end_io = request_endio ;
cache_bio - > bi_private = & s - > cl ;
2013-03-23 16:11:31 -07:00
2013-09-10 17:06:17 -07:00
bch_bio_map ( cache_bio , NULL ) ;
if ( bio_alloc_pages ( cache_bio , __GFP_NOWARN | GFP_NOIO ) )
2013-03-23 16:11:31 -07:00
goto out_put ;
2013-09-10 19:02:45 -07:00
if ( reada )
bch_mark_cache_readahead ( s - > iop . c , s - > d ) ;
2013-09-10 17:06:17 -07:00
s - > cache_miss = miss ;
2013-09-10 19:02:45 -07:00
s - > iop . bio = cache_bio ;
2013-09-10 17:06:17 -07:00
bio_get ( cache_bio ) ;
2013-11-23 23:11:25 -08:00
closure_bio_submit ( cache_bio , & s - > cl ) ;
2013-03-23 16:11:31 -07:00
return ret ;
out_put :
2013-09-10 17:06:17 -07:00
bio_put ( cache_bio ) ;
2013-03-23 16:11:31 -07:00
out_submit :
2013-09-10 18:39:16 -07:00
miss - > bi_end_io = request_endio ;
miss - > bi_private = & s - > cl ;
2013-11-23 23:11:25 -08:00
closure_bio_submit ( miss , & s - > cl ) ;
2013-03-23 16:11:31 -07:00
return ret ;
}
2013-09-10 17:06:17 -07:00
static void cached_dev_read ( struct cached_dev * dc , struct search * s )
2013-03-23 16:11:31 -07:00
{
struct closure * cl = & s - > cl ;
2013-09-10 19:02:45 -07:00
closure_call ( & s - > iop . cl , cache_lookup , NULL , cl ) ;
2013-09-10 17:06:17 -07:00
continue_at ( cl , cached_dev_read_done_bh , NULL ) ;
2013-03-23 16:11:31 -07:00
}
/* Process writes */
static void cached_dev_write_complete ( struct closure * cl )
{
struct search * s = container_of ( cl , struct search , cl ) ;
struct cached_dev * dc = container_of ( s - > d , struct cached_dev , disk ) ;
up_read_non_owner ( & dc - > writeback_lock ) ;
cached_dev_bio_complete ( cl ) ;
}
2013-09-10 17:06:17 -07:00
static void cached_dev_write ( struct cached_dev * dc , struct search * s )
2013-03-23 16:11:31 -07:00
{
struct closure * cl = & s - > cl ;
struct bio * bio = & s - > bio . bio ;
2013-10-11 15:44:27 -07:00
struct bkey start = KEY ( dc - > disk . id , bio - > bi_iter . bi_sector , 0 ) ;
2013-07-24 17:24:52 -07:00
struct bkey end = KEY ( dc - > disk . id , bio_end_sector ( bio ) , 0 ) ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
bch_keybuf_check_overlapping ( & s - > iop . c - > moving_gc_keys , & start , & end ) ;
2013-03-23 16:11:31 -07:00
down_read_non_owner ( & dc - > writeback_lock ) ;
if ( bch_keybuf_check_overlapping ( & dc - > writeback_keys , & start , & end ) ) {
2013-07-24 17:24:52 -07:00
/*
* We overlap with some dirty data undergoing background
* writeback , force this write to writeback
*/
2013-09-10 19:02:45 -07:00
s - > iop . bypass = false ;
s - > iop . writeback = true ;
2013-03-23 16:11:31 -07:00
}
2013-07-24 17:24:52 -07:00
/*
* Discards aren ' t _required_ to do anything , so skipping if
* check_overlapping returned true is ok
*
* But check_overlapping drops dirty keys for which io hasn ' t started ,
* so we still want to call it .
*/
2016-06-05 14:32:05 -05:00
if ( bio_op ( bio ) = = REQ_OP_DISCARD )
2013-09-10 19:02:45 -07:00
s - > iop . bypass = true ;
2013-03-23 16:11:31 -07:00
2013-06-05 06:24:39 -07:00
if ( should_writeback ( dc , s - > orig_bio ,
cache_mode ( dc , bio ) ,
2013-09-10 19:02:45 -07:00
s - > iop . bypass ) ) {
s - > iop . bypass = false ;
s - > iop . writeback = true ;
2013-06-05 06:24:39 -07:00
}
2013-09-10 19:02:45 -07:00
if ( s - > iop . bypass ) {
s - > iop . bio = s - > orig_bio ;
bio_get ( s - > iop . bio ) ;
2013-03-23 16:11:31 -07:00
2016-06-05 14:32:05 -05:00
if ( ( bio_op ( bio ) ! = REQ_OP_DISCARD ) | |
2013-07-24 17:24:52 -07:00
blk_queue_discard ( bdev_get_queue ( dc - > bdev ) ) )
2013-11-23 23:11:25 -08:00
closure_bio_submit ( bio , cl ) ;
2013-09-10 19:02:45 -07:00
} else if ( s - > iop . writeback ) {
2013-06-05 06:21:07 -07:00
bch_writeback_add ( dc ) ;
2013-09-10 19:02:45 -07:00
s - > iop . bio = bio ;
2013-06-26 17:25:38 -07:00
2016-08-05 15:35:16 -06:00
if ( bio - > bi_opf & REQ_PREFLUSH ) {
2013-06-26 17:25:38 -07:00
/* Also need to send a flush to the backing device */
2013-10-22 15:35:50 -07:00
struct bio * flush = bio_alloc_bioset ( GFP_NOIO , 0 ,
2013-09-23 23:17:36 -07:00
dc - > disk . bio_split ) ;
2013-06-26 17:25:38 -07:00
2013-09-23 23:17:36 -07:00
flush - > bi_bdev = bio - > bi_bdev ;
flush - > bi_end_io = request_endio ;
flush - > bi_private = cl ;
2016-06-05 14:32:05 -05:00
bio_set_op_attrs ( flush , REQ_OP_WRITE , WRITE_FLUSH ) ;
2013-09-23 23:17:36 -07:00
2013-11-23 23:11:25 -08:00
closure_bio_submit ( flush , cl ) ;
2013-06-26 17:25:38 -07:00
}
2013-07-24 17:24:52 -07:00
} else {
2013-11-23 18:19:27 -08:00
s - > iop . bio = bio_clone_fast ( bio , GFP_NOIO , dc - > disk . bio_split ) ;
2013-07-24 17:24:52 -07:00
2013-11-23 23:11:25 -08:00
closure_bio_submit ( bio , cl ) ;
2013-03-23 16:11:31 -07:00
}
2013-07-24 17:24:52 -07:00
2013-09-10 19:02:45 -07:00
closure_call ( & s - > iop . cl , bch_data_insert , NULL , cl ) ;
2013-03-23 16:11:31 -07:00
continue_at ( cl , cached_dev_write_complete , NULL ) ;
}
2013-10-24 17:07:04 -07:00
static void cached_dev_nodata ( struct closure * cl )
2013-03-23 16:11:31 -07:00
{
2013-10-24 17:07:04 -07:00
struct search * s = container_of ( cl , struct search , cl ) ;
2013-03-23 16:11:31 -07:00
struct bio * bio = & s - > bio . bio ;
2013-09-10 19:02:45 -07:00
if ( s - > iop . flush_journal )
bch_journal_meta ( s - > iop . c , cl ) ;
2013-03-23 16:11:31 -07:00
2013-07-24 17:24:52 -07:00
/* If it's a flush, we send the flush to the backing device too */
2013-11-23 23:11:25 -08:00
closure_bio_submit ( bio , cl ) ;
2013-03-23 16:11:31 -07:00
continue_at ( cl , cached_dev_bio_complete , NULL ) ;
}
/* Cached devices - read & write stuff */
2015-11-05 10:41:16 -07:00
static blk_qc_t cached_dev_make_request ( struct request_queue * q ,
struct bio * bio )
2013-03-23 16:11:31 -07:00
{
struct search * s ;
struct bcache_device * d = bio - > bi_bdev - > bd_disk - > private_data ;
struct cached_dev * dc = container_of ( d , struct cached_dev , disk ) ;
2014-11-24 11:05:24 +08:00
int rw = bio_data_dir ( bio ) ;
2013-03-23 16:11:31 -07:00
2014-11-24 11:05:24 +08:00
generic_start_io_acct ( rw , bio_sectors ( bio ) , & d - > disk - > part0 ) ;
2013-03-23 16:11:31 -07:00
bio - > bi_bdev = dc - > bdev ;
2013-10-11 15:44:27 -07:00
bio - > bi_iter . bi_sector + = dc - > sb . data_offset ;
2013-03-23 16:11:31 -07:00
if ( cached_dev_get ( dc ) ) {
s = search_alloc ( bio , d ) ;
2013-09-10 19:02:45 -07:00
trace_bcache_request_start ( s - > d , bio ) ;
2013-03-23 16:11:31 -07:00
2013-10-11 15:44:27 -07:00
if ( ! bio - > bi_iter . bi_size ) {
2013-10-24 17:07:04 -07:00
/*
* can ' t call bch_journal_meta from under
* generic_make_request
*/
continue_at_nobarrier ( & s - > cl ,
cached_dev_nodata ,
bcache_wq ) ;
} else {
2013-09-10 19:02:45 -07:00
s - > iop . bypass = check_should_bypass ( dc , bio ) ;
2013-07-24 17:24:52 -07:00
if ( rw )
2013-09-10 17:06:17 -07:00
cached_dev_write ( dc , s ) ;
2013-07-24 17:24:52 -07:00
else
2013-09-10 17:06:17 -07:00
cached_dev_read ( dc , s ) ;
2013-07-24 17:24:52 -07:00
}
2013-03-23 16:11:31 -07:00
} else {
2016-06-05 14:32:05 -05:00
if ( ( bio_op ( bio ) = = REQ_OP_DISCARD ) & &
2013-03-23 16:11:31 -07:00
! blk_queue_discard ( bdev_get_queue ( dc - > bdev ) ) )
2015-07-20 15:29:37 +02:00
bio_endio ( bio ) ;
2013-03-23 16:11:31 -07:00
else
2013-11-23 23:11:25 -08:00
generic_make_request ( bio ) ;
2013-03-23 16:11:31 -07:00
}
2015-11-05 10:41:16 -07:00
return BLK_QC_T_NONE ;
2013-03-23 16:11:31 -07:00
}
static int cached_dev_ioctl ( struct bcache_device * d , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
struct cached_dev * dc = container_of ( d , struct cached_dev , disk ) ;
return __blkdev_driver_ioctl ( dc - > bdev , mode , cmd , arg ) ;
}
static int cached_dev_congested ( void * data , int bits )
{
struct bcache_device * d = data ;
struct cached_dev * dc = container_of ( d , struct cached_dev , disk ) ;
struct request_queue * q = bdev_get_queue ( dc - > bdev ) ;
int ret = 0 ;
if ( bdi_congested ( & q - > backing_dev_info , bits ) )
return 1 ;
if ( cached_dev_get ( dc ) ) {
unsigned i ;
struct cache * ca ;
for_each_cache ( ca , d - > c , i ) {
q = bdev_get_queue ( ca - > bdev ) ;
ret | = bdi_congested ( & q - > backing_dev_info , bits ) ;
}
cached_dev_put ( dc ) ;
}
return ret ;
}
void bch_cached_dev_request_init ( struct cached_dev * dc )
{
struct gendisk * g = dc - > disk . disk ;
g - > queue - > make_request_fn = cached_dev_make_request ;
g - > queue - > backing_dev_info . congested_fn = cached_dev_congested ;
dc - > disk . cache_miss = cached_dev_cache_miss ;
dc - > disk . ioctl = cached_dev_ioctl ;
}
/* Flash backed devices */
static int flash_dev_cache_miss ( struct btree * b , struct search * s ,
struct bio * bio , unsigned sectors )
{
2014-01-16 15:04:18 -08:00
unsigned bytes = min ( sectors , bio_sectors ( bio ) ) < < 9 ;
2013-03-23 16:11:31 -07:00
2014-01-16 15:04:18 -08:00
swap ( bio - > bi_iter . bi_size , bytes ) ;
zero_fill_bio ( bio ) ;
swap ( bio - > bi_iter . bi_size , bytes ) ;
2013-03-23 16:11:31 -07:00
2014-01-16 15:04:18 -08:00
bio_advance ( bio , bytes ) ;
2013-06-06 18:15:57 -07:00
2013-10-11 15:44:27 -07:00
if ( ! bio - > bi_iter . bi_size )
2013-07-24 17:41:08 -07:00
return MAP_DONE ;
2013-03-23 16:11:31 -07:00
2013-07-24 17:41:08 -07:00
return MAP_CONTINUE ;
2013-03-23 16:11:31 -07:00
}
2013-10-24 17:07:04 -07:00
static void flash_dev_nodata ( struct closure * cl )
{
struct search * s = container_of ( cl , struct search , cl ) ;
2013-09-10 19:02:45 -07:00
if ( s - > iop . flush_journal )
bch_journal_meta ( s - > iop . c , cl ) ;
2013-10-24 17:07:04 -07:00
continue_at ( cl , search_free , NULL ) ;
}
2015-11-05 10:41:16 -07:00
static blk_qc_t flash_dev_make_request ( struct request_queue * q ,
struct bio * bio )
2013-03-23 16:11:31 -07:00
{
struct search * s ;
struct closure * cl ;
struct bcache_device * d = bio - > bi_bdev - > bd_disk - > private_data ;
2014-11-24 11:05:24 +08:00
int rw = bio_data_dir ( bio ) ;
2013-03-23 16:11:31 -07:00
2014-11-24 11:05:24 +08:00
generic_start_io_acct ( rw , bio_sectors ( bio ) , & d - > disk - > part0 ) ;
2013-03-23 16:11:31 -07:00
s = search_alloc ( bio , d ) ;
cl = & s - > cl ;
bio = & s - > bio . bio ;
2013-09-10 19:02:45 -07:00
trace_bcache_request_start ( s - > d , bio ) ;
2013-03-23 16:11:31 -07:00
2013-10-11 15:44:27 -07:00
if ( ! bio - > bi_iter . bi_size ) {
2013-10-24 17:07:04 -07:00
/*
* can ' t call bch_journal_meta from under
* generic_make_request
*/
continue_at_nobarrier ( & s - > cl ,
flash_dev_nodata ,
bcache_wq ) ;
2015-11-05 10:41:16 -07:00
return BLK_QC_T_NONE ;
2013-07-24 17:24:52 -07:00
} else if ( rw ) {
2013-09-10 19:02:45 -07:00
bch_keybuf_check_overlapping ( & s - > iop . c - > moving_gc_keys ,
2013-10-11 15:44:27 -07:00
& KEY ( d - > id , bio - > bi_iter . bi_sector , 0 ) ,
2013-06-06 18:15:57 -07:00
& KEY ( d - > id , bio_end_sector ( bio ) , 0 ) ) ;
2013-03-23 16:11:31 -07:00
2016-06-05 14:32:05 -05:00
s - > iop . bypass = ( bio_op ( bio ) = = REQ_OP_DISCARD ) ! = 0 ;
2013-09-10 19:02:45 -07:00
s - > iop . writeback = true ;
s - > iop . bio = bio ;
2013-03-23 16:11:31 -07:00
2013-09-10 19:02:45 -07:00
closure_call ( & s - > iop . cl , bch_data_insert , NULL , cl ) ;
2013-03-23 16:11:31 -07:00
} else {
2013-09-10 19:02:45 -07:00
closure_call ( & s - > iop . cl , cache_lookup , NULL , cl ) ;
2013-03-23 16:11:31 -07:00
}
continue_at ( cl , search_free , NULL ) ;
2015-11-05 10:41:16 -07:00
return BLK_QC_T_NONE ;
2013-03-23 16:11:31 -07:00
}
static int flash_dev_ioctl ( struct bcache_device * d , fmode_t mode ,
unsigned int cmd , unsigned long arg )
{
return - ENOTTY ;
}
static int flash_dev_congested ( void * data , int bits )
{
struct bcache_device * d = data ;
struct request_queue * q ;
struct cache * ca ;
unsigned i ;
int ret = 0 ;
for_each_cache ( ca , d - > c , i ) {
q = bdev_get_queue ( ca - > bdev ) ;
ret | = bdi_congested ( & q - > backing_dev_info , bits ) ;
}
return ret ;
}
void bch_flash_dev_request_init ( struct bcache_device * d )
{
struct gendisk * g = d - > disk ;
g - > queue - > make_request_fn = flash_dev_make_request ;
g - > queue - > backing_dev_info . congested_fn = flash_dev_congested ;
d - > cache_miss = flash_dev_cache_miss ;
d - > ioctl = flash_dev_ioctl ;
}
void bch_request_exit ( void )
{
if ( bch_search_cache )
kmem_cache_destroy ( bch_search_cache ) ;
}
int __init bch_request_init ( void )
{
bch_search_cache = KMEM_CACHE ( search , 0 ) ;
if ( ! bch_search_cache )
return - ENOMEM ;
return 0 ;
}