2018-01-30 15:13:48 +03:00
/*
* Copyright ( C ) 2018 Red Hat , Inc . All rights reserved .
*
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v .2 .1 .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program ; if not , write to the Free Software Foundation ,
* Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
*/
2018-05-14 12:30:20 +03:00
# include "lib/device/bcache.h"
2018-05-30 16:17:26 +03:00
# include "base/data-struct/radix-tree.h"
2018-05-16 15:43:02 +03:00
# include "lib/log/lvm-logging.h"
2018-05-14 12:30:20 +03:00
# include "lib/log/log.h"
2018-05-02 21:45:06 +03:00
2018-01-30 13:46:08 +03:00
# include <errno.h>
# include <fcntl.h>
# include <sys/stat.h>
# include <sys/types.h>
# include <stdbool.h>
# include <stdlib.h>
# include <stdio.h>
# include <stdint.h>
# include <libaio.h>
# include <unistd.h>
# include <linux/fs.h>
# include <sys/user.h>
# define SECTOR_SHIFT 9L
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
# define FD_TABLE_INC 1024
static int _fd_table_size ;
static int * _fd_table ;
2018-01-30 13:46:08 +03:00
//----------------------------------------------------------------
2018-05-01 15:21:53 +03:00
static void log_sys_warn ( const char * call )
2018-01-30 15:13:48 +03:00
{
2021-02-07 16:06:12 +03:00
log_warn ( " WARNING: %s failed: %s. " , call , strerror ( errno ) ) ;
2018-01-30 15:13:48 +03:00
}
2018-01-30 13:46:08 +03:00
// Assumes the list is not empty.
static inline struct dm_list * _list_pop ( struct dm_list * head )
{
struct dm_list * l ;
l = head - > n ;
dm_list_del ( l ) ;
return l ;
}
//----------------------------------------------------------------
struct control_block {
struct dm_list list ;
void * context ;
struct iocb cb ;
} ;
struct cb_set {
struct dm_list free ;
struct dm_list allocated ;
2020-10-01 00:28:44 +03:00
struct control_block vec [ ] ;
2018-01-30 13:46:08 +03:00
} control_block_set ;
static struct cb_set * _cb_set_create ( unsigned nr )
{
2020-08-28 20:55:01 +03:00
unsigned i ;
2020-10-01 00:28:44 +03:00
struct cb_set * cbs = malloc ( sizeof ( * cbs ) + nr * sizeof ( * cbs - > vec ) ) ;
2018-01-30 13:46:08 +03:00
2021-03-09 21:35:53 +03:00
if ( ! cbs )
2018-01-30 13:46:08 +03:00
return NULL ;
dm_list_init ( & cbs - > free ) ;
dm_list_init ( & cbs - > allocated ) ;
for ( i = 0 ; i < nr ; i + + )
dm_list_add ( & cbs - > free , & cbs - > vec [ i ] . list ) ;
return cbs ;
}
2018-01-30 15:13:48 +03:00
static void _cb_set_destroy ( struct cb_set * cbs )
2018-01-30 13:46:08 +03:00
{
2018-01-30 15:13:48 +03:00
// We know this is always called after a wait_all. So there should
// never be in flight IO.
2018-01-30 13:46:08 +03:00
if ( ! dm_list_empty ( & cbs - > allocated ) ) {
2018-01-30 15:13:48 +03:00
// bail out
2021-02-07 16:06:12 +03:00
log_warn ( " WARNING: async io still in flight. " ) ;
2018-01-30 15:13:48 +03:00
return ;
2018-01-30 13:46:08 +03:00
}
2018-06-08 15:40:53 +03:00
free ( cbs ) ;
2018-01-30 13:46:08 +03:00
}
static struct control_block * _cb_alloc ( struct cb_set * cbs , void * context )
{
struct control_block * cb ;
if ( dm_list_empty ( & cbs - > free ) )
return NULL ;
cb = dm_list_item ( _list_pop ( & cbs - > free ) , struct control_block ) ;
cb - > context = context ;
dm_list_add ( & cbs - > allocated , & cb - > list ) ;
return cb ;
}
static void _cb_free ( struct cb_set * cbs , struct control_block * cb )
{
dm_list_del ( & cb - > list ) ;
dm_list_add_h ( & cbs - > free , & cb - > list ) ;
}
static struct control_block * _iocb_to_cb ( struct iocb * icb )
{
return dm_list_struct_base ( icb , struct control_block , cb ) ;
}
//----------------------------------------------------------------
2018-02-05 19:04:23 +03:00
struct async_engine {
struct io_engine e ;
2018-01-30 13:46:08 +03:00
io_context_t aio_context ;
struct cb_set * cbs ;
2018-05-17 12:05:10 +03:00
unsigned page_mask ;
2018-01-30 13:46:08 +03:00
} ;
2018-02-05 19:04:23 +03:00
static struct async_engine * _to_async ( struct io_engine * e )
2018-01-30 13:46:08 +03:00
{
2018-02-05 19:04:23 +03:00
return container_of ( e , struct async_engine , e ) ;
2018-01-30 13:46:08 +03:00
}
2018-02-05 19:04:23 +03:00
static void _async_destroy ( struct io_engine * ioe )
2018-01-30 13:46:08 +03:00
{
2018-02-01 17:52:43 +03:00
int r ;
2018-02-05 19:04:23 +03:00
struct async_engine * e = _to_async ( ioe ) ;
2018-02-01 17:52:43 +03:00
2018-01-30 13:46:08 +03:00
_cb_set_destroy ( e - > cbs ) ;
2018-02-01 17:52:43 +03:00
// io_destroy is really slow
r = io_destroy ( e - > aio_context ) ;
if ( r )
log_sys_warn ( " io_destroy " ) ;
2018-06-08 15:40:53 +03:00
free ( e ) ;
2018-01-30 13:46:08 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
static int _last_byte_di ;
2018-10-30 00:53:17 +03:00
static uint64_t _last_byte_offset ;
static int _last_byte_sector_size ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
static bool _async_issue ( struct io_engine * ioe , enum dir d , int di ,
2018-02-05 19:04:23 +03:00
sector_t sb , sector_t se , void * data , void * context )
2018-01-30 13:46:08 +03:00
{
int r ;
struct iocb * cb_array [ 1 ] ;
struct control_block * cb ;
2018-02-05 19:04:23 +03:00
struct async_engine * e = _to_async ( ioe ) ;
2018-10-30 00:53:17 +03:00
sector_t offset ;
sector_t nbytes ;
sector_t limit_nbytes ;
2019-07-26 22:21:08 +03:00
sector_t orig_nbytes ;
2018-10-30 00:53:17 +03:00
sector_t extra_nbytes = 0 ;
2018-01-30 13:46:08 +03:00
2018-05-17 12:05:10 +03:00
if ( ( ( uintptr_t ) data ) & e - > page_mask ) {
2018-01-30 15:13:48 +03:00
log_warn ( " misaligned data buffer " ) ;
2018-01-30 13:46:08 +03:00
return false ;
}
2018-10-30 00:53:17 +03:00
offset = sb < < SECTOR_SHIFT ;
nbytes = ( se - sb ) < < SECTOR_SHIFT ;
/*
* If bcache block goes past where lvm wants to write , then clamp it .
*/
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
if ( ( d = = DIR_WRITE ) & & _last_byte_offset & & ( di = = _last_byte_di ) ) {
2018-10-30 00:53:17 +03:00
if ( offset > _last_byte_offset ) {
log_error ( " Limit write at %llu len %llu beyond last byte %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) _last_byte_offset ) ;
return false ;
}
2019-07-26 22:21:08 +03:00
/*
* If the bcache block offset + len goes beyond where lvm is
* intending to write , then reduce the len being written
* ( which is the bcache block size ) so we don ' t write past
* the limit set by lvm . If after applying the limit , the
* resulting size is not a multiple of the sector size ( 512
* or 4096 ) then extend the reduced size to be a multiple of
* the sector size ( we don ' t want to write partial sectors . )
*/
2018-10-30 00:53:17 +03:00
if ( offset + nbytes > _last_byte_offset ) {
limit_nbytes = _last_byte_offset - offset ;
2019-07-26 22:21:08 +03:00
if ( limit_nbytes % _last_byte_sector_size ) {
2018-10-30 00:53:17 +03:00
extra_nbytes = _last_byte_sector_size - ( limit_nbytes % _last_byte_sector_size ) ;
2019-07-26 22:21:08 +03:00
/*
* adding extra_nbytes to the reduced nbytes ( limit_nbytes )
* should make the final write size a multiple of the
* sector size . This should never result in a final size
* larger than the bcache block size ( as long as the bcache
* block size is a multiple of the sector size ) .
*/
if ( limit_nbytes + extra_nbytes > nbytes ) {
log_warn ( " Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) extra_nbytes ,
( unsigned long long ) _last_byte_sector_size ) ;
extra_nbytes = 0 ;
}
}
orig_nbytes = nbytes ;
2018-10-30 00:53:17 +03:00
if ( extra_nbytes ) {
log_debug ( " Limit write at %llu len %llu to len %llu rounded to %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) ( limit_nbytes + extra_nbytes ) ) ;
nbytes = limit_nbytes + extra_nbytes ;
} else {
log_debug ( " Limit write at %llu len %llu to len %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ) ;
nbytes = limit_nbytes ;
}
2019-07-26 22:21:08 +03:00
/*
* This shouldn ' t happen , the reduced + extended
* nbytes value should never be larger than the
* bcache block size .
*/
if ( nbytes > orig_nbytes ) {
log_error ( " Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) orig_nbytes ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) extra_nbytes ,
( unsigned long long ) _last_byte_sector_size ) ;
return false ;
}
2018-10-30 00:53:17 +03:00
}
}
2018-01-30 13:46:08 +03:00
cb = _cb_alloc ( e - > cbs , context ) ;
if ( ! cb ) {
2018-01-30 15:13:48 +03:00
log_warn ( " couldn't allocate control block " ) ;
2018-01-30 13:46:08 +03:00
return false ;
}
memset ( & cb - > cb , 0 , sizeof ( cb - > cb ) ) ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
cb - > cb . aio_fildes = ( int ) _fd_table [ di ] ;
2018-01-30 13:46:08 +03:00
cb - > cb . u . c . buf = data ;
2018-10-30 00:53:17 +03:00
cb - > cb . u . c . offset = offset ;
cb - > cb . u . c . nbytes = nbytes ;
2018-01-30 13:46:08 +03:00
cb - > cb . aio_lio_opcode = ( d = = DIR_READ ) ? IO_CMD_PREAD : IO_CMD_PWRITE ;
2018-10-30 00:53:17 +03:00
#if 0
if ( d = = DIR_READ ) {
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
log_debug ( " io R off %llu bytes %llu di %d fd %d " ,
2018-10-30 00:53:17 +03:00
( unsigned long long ) cb - > cb . u . c . offset ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
( unsigned long long ) cb - > cb . u . c . nbytes ,
di , _fd_table [ di ] ) ;
2018-10-30 00:53:17 +03:00
} else {
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
log_debug ( " io W off %llu bytes %llu di %d fd %d " ,
2018-10-30 00:53:17 +03:00
( unsigned long long ) cb - > cb . u . c . offset ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
( unsigned long long ) cb - > cb . u . c . nbytes ,
di , _fd_table [ di ] ) ;
2018-10-30 00:53:17 +03:00
}
# endif
2018-01-30 13:46:08 +03:00
cb_array [ 0 ] = & cb - > cb ;
2018-02-20 18:33:27 +03:00
do {
r = io_submit ( e - > aio_context , 1 , cb_array ) ;
} while ( r = = - EAGAIN ) ;
2018-01-30 13:46:08 +03:00
if ( r < 0 ) {
_cb_free ( e - > cbs , cb ) ;
return false ;
}
return true ;
}
2018-08-24 22:46:51 +03:00
/*
* MAX_IO is returned to the layer above via bcache_max_prefetches ( ) which
* tells the caller how many devices to submit io for concurrently . There will
* be an open file descriptor for each of these , so keep it low enough to avoid
* reaching the default max open file limit ( 1024 ) when there are over 1024
* devices being scanned .
*/
# define MAX_IO 256
2018-02-02 15:06:14 +03:00
# define MAX_EVENT 64
2018-01-30 13:46:08 +03:00
2018-02-05 19:04:23 +03:00
static bool _async_wait ( struct io_engine * ioe , io_complete_fn fn )
2018-01-30 13:46:08 +03:00
{
int i , r ;
2018-02-02 15:06:14 +03:00
struct io_event event [ MAX_EVENT ] ;
2018-01-30 13:46:08 +03:00
struct control_block * cb ;
2018-02-05 19:04:23 +03:00
struct async_engine * e = _to_async ( ioe ) ;
2018-01-30 13:46:08 +03:00
memset ( & event , 0 , sizeof ( event ) ) ;
2020-10-02 18:16:14 +03:00
r = io_getevents ( e - > aio_context , 1 , MAX_EVENT , event , NULL ) ;
2018-02-20 18:33:27 +03:00
2018-01-30 13:46:08 +03:00
if ( r < 0 ) {
2018-01-30 15:13:48 +03:00
log_sys_warn ( " io_getevents " ) ;
2018-01-30 13:46:08 +03:00
return false ;
}
for ( i = 0 ; i < r ; i + + ) {
struct io_event * ev = event + i ;
cb = _iocb_to_cb ( ( struct iocb * ) ev - > obj ) ;
if ( ev - > res = = cb - > cb . u . c . nbytes )
fn ( ( void * ) cb - > context , 0 ) ;
else if ( ( int ) ev - > res < 0 )
fn ( cb - > context , ( int ) ev - > res ) ;
2018-02-20 18:33:27 +03:00
// FIXME: dct added this. a short read is ok?!
2018-02-07 00:18:11 +03:00
else if ( ev - > res > = ( 1 < < SECTOR_SHIFT ) ) {
/* minimum acceptable read is 1 sector */
fn ( ( void * ) cb - > context , 0 ) ;
} else {
2018-01-30 13:46:08 +03:00
fn ( cb - > context , - ENODATA ) ;
}
_cb_free ( e - > cbs , cb ) ;
}
return true ;
}
2018-02-20 18:33:27 +03:00
static unsigned _async_max_io ( struct io_engine * e )
2018-02-08 19:10:31 +03:00
{
2018-02-20 18:33:27 +03:00
return MAX_IO ;
2018-02-08 19:10:31 +03:00
}
2018-02-20 18:33:27 +03:00
struct io_engine * create_async_io_engine ( void )
2018-02-05 19:04:23 +03:00
{
2021-09-19 21:23:24 +03:00
static int _pagesize = 0 ;
2018-02-05 19:04:23 +03:00
int r ;
2021-09-19 21:23:24 +03:00
struct async_engine * e ;
2018-02-05 19:04:23 +03:00
2021-09-19 21:23:24 +03:00
if ( ( _pagesize < = 0 ) & & ( _pagesize = sysconf ( _SC_PAGESIZE ) ) < 0 ) {
log_warn ( " _SC_PAGESIZE returns negative value. " ) ;
return NULL ;
}
if ( ! ( e = malloc ( sizeof ( * e ) ) ) )
2018-02-05 19:04:23 +03:00
return NULL ;
e - > e . destroy = _async_destroy ;
e - > e . issue = _async_issue ;
e - > e . wait = _async_wait ;
2018-02-08 19:10:31 +03:00
e - > e . max_io = _async_max_io ;
2018-02-05 19:04:23 +03:00
e - > aio_context = 0 ;
2018-02-20 18:33:27 +03:00
r = io_setup ( MAX_IO , & e - > aio_context ) ;
2018-02-05 19:04:23 +03:00
if ( r < 0 ) {
2018-11-16 22:09:29 +03:00
log_debug ( " io_setup failed %d " , r ) ;
2018-06-08 15:40:53 +03:00
free ( e ) ;
2018-02-05 19:04:23 +03:00
return NULL ;
}
2018-02-20 18:33:27 +03:00
e - > cbs = _cb_set_create ( MAX_IO ) ;
2018-02-05 19:04:23 +03:00
if ( ! e - > cbs ) {
log_warn ( " couldn't create control block set " ) ;
2018-06-08 15:40:53 +03:00
free ( e ) ;
2018-02-05 19:04:23 +03:00
return NULL ;
}
2021-09-19 21:23:24 +03:00
e - > page_mask = ( unsigned ) _pagesize - 1 ;
2018-05-17 12:05:10 +03:00
2018-02-05 19:04:23 +03:00
return & e - > e ;
}
2018-01-30 13:46:08 +03:00
//----------------------------------------------------------------
2018-05-10 16:29:26 +03:00
struct sync_io {
struct dm_list list ;
void * context ;
} ;
struct sync_engine {
struct io_engine e ;
struct dm_list complete ;
} ;
static struct sync_engine * _to_sync ( struct io_engine * e )
{
return container_of ( e , struct sync_engine , e ) ;
}
static void _sync_destroy ( struct io_engine * ioe )
{
struct sync_engine * e = _to_sync ( ioe ) ;
2018-06-08 15:40:53 +03:00
free ( e ) ;
2018-05-10 16:29:26 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
static bool _sync_issue ( struct io_engine * ioe , enum dir d , int di ,
2018-05-10 16:29:26 +03:00
sector_t sb , sector_t se , void * data , void * context )
{
2018-11-16 22:09:29 +03:00
int rv ;
off_t off ;
uint64_t where ;
uint64_t pos = 0 ;
uint64_t len = ( se - sb ) * 512 ;
2018-05-10 16:29:26 +03:00
struct sync_engine * e = _to_sync ( ioe ) ;
struct sync_io * io = malloc ( sizeof ( * io ) ) ;
2018-05-11 07:37:47 +03:00
if ( ! io ) {
log_warn ( " unable to allocate sync_io " ) ;
2018-05-10 16:29:26 +03:00
return false ;
2018-05-11 07:37:47 +03:00
}
2018-05-10 16:29:26 +03:00
2018-05-11 07:37:47 +03:00
where = sb * 512 ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
off = lseek ( _fd_table [ di ] , where , SEEK_SET ) ;
2018-11-16 22:09:29 +03:00
if ( off = = ( off_t ) - 1 ) {
log_warn ( " Device seek error %d for offset %llu " , errno , ( unsigned long long ) where ) ;
2018-10-15 15:29:52 +03:00
free ( io ) ;
2018-11-16 22:09:29 +03:00
return false ;
}
if ( off ! = ( off_t ) where ) {
log_warn ( " Device seek failed for offset %llu " , ( unsigned long long ) where ) ;
free ( io ) ;
return false ;
2018-05-11 07:37:47 +03:00
}
2018-05-10 16:29:26 +03:00
2018-11-16 21:21:20 +03:00
/*
* If bcache block goes past where lvm wants to write , then clamp it .
*/
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
if ( ( d = = DIR_WRITE ) & & _last_byte_offset & & ( di = = _last_byte_di ) ) {
2018-11-16 21:21:20 +03:00
uint64_t offset = where ;
uint64_t nbytes = len ;
sector_t limit_nbytes = 0 ;
sector_t extra_nbytes = 0 ;
2019-07-26 22:21:08 +03:00
sector_t orig_nbytes = 0 ;
2018-11-16 21:21:20 +03:00
if ( offset > _last_byte_offset ) {
log_error ( " Limit write at %llu len %llu beyond last byte %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) _last_byte_offset ) ;
2018-12-21 23:01:18 +03:00
free ( io ) ;
2018-11-16 21:21:20 +03:00
return false ;
}
if ( offset + nbytes > _last_byte_offset ) {
limit_nbytes = _last_byte_offset - offset ;
2019-07-26 22:21:08 +03:00
if ( limit_nbytes % _last_byte_sector_size ) {
2018-11-16 21:21:20 +03:00
extra_nbytes = _last_byte_sector_size - ( limit_nbytes % _last_byte_sector_size ) ;
2019-07-26 22:21:08 +03:00
/*
* adding extra_nbytes to the reduced nbytes ( limit_nbytes )
* should make the final write size a multiple of the
* sector size . This should never result in a final size
* larger than the bcache block size ( as long as the bcache
* block size is a multiple of the sector size ) .
*/
if ( limit_nbytes + extra_nbytes > nbytes ) {
log_warn ( " Skip extending write at %llu len %llu limit %llu extra %llu sector_size %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) extra_nbytes ,
( unsigned long long ) _last_byte_sector_size ) ;
extra_nbytes = 0 ;
}
}
orig_nbytes = nbytes ;
2018-11-16 21:21:20 +03:00
if ( extra_nbytes ) {
log_debug ( " Limit write at %llu len %llu to len %llu rounded to %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) ( limit_nbytes + extra_nbytes ) ) ;
nbytes = limit_nbytes + extra_nbytes ;
} else {
log_debug ( " Limit write at %llu len %llu to len %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ) ;
nbytes = limit_nbytes ;
}
2019-07-26 22:21:08 +03:00
/*
* This shouldn ' t happen , the reduced + extended
* nbytes value should never be larger than the
* bcache block size .
*/
if ( nbytes > orig_nbytes ) {
log_error ( " Invalid adjusted write at %llu len %llu adjusted %llu limit %llu extra %llu sector_size %llu " ,
( unsigned long long ) offset ,
( unsigned long long ) orig_nbytes ,
( unsigned long long ) nbytes ,
( unsigned long long ) limit_nbytes ,
( unsigned long long ) extra_nbytes ,
( unsigned long long ) _last_byte_sector_size ) ;
2020-01-29 19:31:17 +03:00
free ( io ) ;
2019-07-26 22:21:08 +03:00
return false ;
}
2018-11-16 21:21:20 +03:00
}
where = offset ;
len = nbytes ;
}
2018-11-16 22:09:29 +03:00
while ( pos < len ) {
if ( d = = DIR_READ )
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
rv = read ( _fd_table [ di ] , ( char * ) data + pos , len - pos ) ;
2018-11-16 22:09:29 +03:00
else
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
rv = write ( _fd_table [ di ] , ( char * ) data + pos , len - pos ) ;
2018-11-16 22:09:29 +03:00
if ( rv = = - 1 & & errno = = EINTR )
continue ;
if ( rv = = - 1 & & errno = = EAGAIN )
continue ;
if ( ! rv )
break ;
if ( rv < 0 ) {
if ( d = = DIR_READ )
log_debug ( " Device read error %d offset %llu len %llu " , errno ,
( unsigned long long ) ( where + pos ) ,
( unsigned long long ) ( len - pos ) ) ;
else
log_debug ( " Device write error %d offset %llu len %llu " , errno ,
( unsigned long long ) ( where + pos ) ,
( unsigned long long ) ( len - pos ) ) ;
2018-10-15 15:29:52 +03:00
free ( io ) ;
2018-11-16 22:09:29 +03:00
return false ;
}
pos + = rv ;
2018-05-11 07:37:47 +03:00
}
2018-11-16 22:09:29 +03:00
if ( pos < len ) {
if ( d = = DIR_READ )
log_warn ( " Device read short %u bytes remaining " , ( unsigned ) ( len - pos ) ) ;
else
log_warn ( " Device write short %u bytes remaining " , ( unsigned ) ( len - pos ) ) ;
/*
free ( io ) ;
2018-05-11 07:37:47 +03:00
return false ;
2018-11-16 22:09:29 +03:00
*/
2018-05-11 07:37:47 +03:00
}
2018-05-10 16:29:26 +03:00
dm_list_add ( & e - > complete , & io - > list ) ;
io - > context = context ;
return true ;
}
static bool _sync_wait ( struct io_engine * ioe , io_complete_fn fn )
{
struct sync_io * io , * tmp ;
struct sync_engine * e = _to_sync ( ioe ) ;
dm_list_iterate_items_safe ( io , tmp , & e - > complete ) {
2018-05-11 07:37:47 +03:00
fn ( io - > context , 0 ) ;
2018-05-10 16:29:26 +03:00
dm_list_del ( & io - > list ) ;
2018-06-08 15:40:53 +03:00
free ( io ) ;
2018-05-10 16:29:26 +03:00
}
return true ;
}
static unsigned _sync_max_io ( struct io_engine * e )
{
return 1 ;
}
struct io_engine * create_sync_io_engine ( void )
{
2018-06-08 15:40:53 +03:00
struct sync_engine * e = malloc ( sizeof ( * e ) ) ;
2018-05-10 16:29:26 +03:00
if ( ! e )
return NULL ;
e - > e . destroy = _sync_destroy ;
e - > e . issue = _sync_issue ;
e - > e . wait = _sync_wait ;
e - > e . max_io = _sync_max_io ;
dm_list_init ( & e - > complete ) ;
return & e - > e ;
}
//----------------------------------------------------------------
2018-01-30 13:46:08 +03:00
# define MIN_BLOCKS 16
# define WRITEBACK_LOW_THRESHOLD_PERCENT 33
# define WRITEBACK_HIGH_THRESHOLD_PERCENT 66
//----------------------------------------------------------------
static void * _alloc_aligned ( size_t len , size_t alignment )
{
void * result = NULL ;
int r = posix_memalign ( & result , alignment , len ) ;
if ( r )
return NULL ;
return result ;
}
//----------------------------------------------------------------
static bool _test_flags ( struct block * b , unsigned bits )
{
return ( b - > flags & bits ) ! = 0 ;
}
static void _set_flags ( struct block * b , unsigned bits )
{
b - > flags | = bits ;
}
static void _clear_flags ( struct block * b , unsigned bits )
{
b - > flags & = ~ bits ;
}
//----------------------------------------------------------------
enum block_flags {
BF_IO_PENDING = ( 1 < < 0 ) ,
BF_DIRTY = ( 1 < < 1 ) ,
} ;
struct bcache {
sector_t block_sectors ;
uint64_t nr_data_blocks ;
uint64_t nr_cache_blocks ;
2018-02-02 15:06:14 +03:00
unsigned max_io ;
2018-01-30 13:46:08 +03:00
struct io_engine * engine ;
void * raw_data ;
struct block * raw_blocks ;
/*
* Lists that categorise the blocks .
*/
unsigned nr_locked ;
unsigned nr_dirty ;
unsigned nr_io_pending ;
struct dm_list free ;
struct dm_list errored ;
struct dm_list dirty ;
struct dm_list clean ;
struct dm_list io_pending ;
2018-05-30 16:17:26 +03:00
struct radix_tree * rtree ;
2018-01-30 13:46:08 +03:00
/*
* Statistics
*/
unsigned read_hits ;
unsigned read_misses ;
unsigned write_zeroes ;
unsigned write_hits ;
unsigned write_misses ;
unsigned prefetches ;
} ;
//----------------------------------------------------------------
2018-05-30 16:17:26 +03:00
struct key_parts {
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
uint32_t di ;
2018-05-30 16:17:26 +03:00
uint64_t b ;
} __attribute__ ( ( packed ) ) ;
2018-01-30 13:46:08 +03:00
2018-05-30 16:17:26 +03:00
union key {
struct key_parts parts ;
uint8_t bytes [ 12 ] ;
} ;
2018-01-30 13:46:08 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
static struct block * _block_lookup ( struct bcache * cache , int di , uint64_t i )
2018-01-30 13:46:08 +03:00
{
2018-05-30 16:17:26 +03:00
union key k ;
union radix_value v ;
2018-01-30 13:46:08 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
k . parts . di = di ;
2018-05-30 16:17:26 +03:00
k . parts . b = i ;
2018-01-30 13:46:08 +03:00
2018-05-30 16:17:26 +03:00
if ( radix_tree_lookup ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . bytes ) , & v ) )
return v . ptr ;
2018-01-30 13:46:08 +03:00
2018-05-30 16:17:26 +03:00
return NULL ;
2018-01-30 13:46:08 +03:00
}
2018-05-30 16:17:26 +03:00
static bool _block_insert ( struct block * b )
2018-01-30 13:46:08 +03:00
{
2018-05-30 16:17:26 +03:00
union key k ;
union radix_value v ;
2018-01-30 13:46:08 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
k . parts . di = b - > di ;
2018-05-30 16:17:26 +03:00
k . parts . b = b - > index ;
v . ptr = b ;
2018-01-30 13:46:08 +03:00
2018-05-30 16:17:26 +03:00
return radix_tree_insert ( b - > cache - > rtree , k . bytes , k . bytes + sizeof ( k . bytes ) , v ) ;
2018-01-30 13:46:08 +03:00
}
2018-05-30 16:17:26 +03:00
static void _block_remove ( struct block * b )
2018-01-30 13:46:08 +03:00
{
2018-05-30 16:17:26 +03:00
union key k ;
2018-01-30 13:46:08 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
k . parts . di = b - > di ;
2018-05-30 16:17:26 +03:00
k . parts . b = b - > index ;
2018-01-30 13:46:08 +03:00
2021-09-20 02:45:55 +03:00
( void ) radix_tree_remove ( b - > cache - > rtree , k . bytes , k . bytes + sizeof ( k . bytes ) ) ;
2018-01-30 13:46:08 +03:00
}
//----------------------------------------------------------------
2018-05-17 12:05:10 +03:00
static bool _init_free_list ( struct bcache * cache , unsigned count , unsigned pgsize )
2018-01-30 13:46:08 +03:00
{
unsigned i ;
size_t block_size = cache - > block_sectors < < SECTOR_SHIFT ;
unsigned char * data =
2018-05-16 23:19:03 +03:00
( unsigned char * ) _alloc_aligned ( count * block_size , pgsize ) ;
2018-01-30 13:46:08 +03:00
/* Allocate the data for each block. We page align the data. */
if ( ! data )
2018-01-30 15:13:48 +03:00
return false ;
2018-01-30 13:46:08 +03:00
2018-06-08 15:40:53 +03:00
cache - > raw_blocks = malloc ( count * sizeof ( * cache - > raw_blocks ) ) ;
2018-06-26 18:04:18 +03:00
if ( ! cache - > raw_blocks ) {
free ( data ) ;
return false ;
}
2018-01-30 13:46:08 +03:00
2018-06-26 18:04:18 +03:00
cache - > raw_data = data ;
2018-01-30 13:46:08 +03:00
for ( i = 0 ; i < count ; i + + ) {
struct block * b = cache - > raw_blocks + i ;
b - > cache = cache ;
b - > data = data + ( block_size * i ) ;
dm_list_add ( & cache - > free , & b - > list ) ;
}
2018-01-30 15:13:48 +03:00
return true ;
2018-01-30 13:46:08 +03:00
}
static void _exit_free_list ( struct bcache * cache )
{
2018-06-08 15:40:53 +03:00
free ( cache - > raw_data ) ;
free ( cache - > raw_blocks ) ;
2018-01-30 13:46:08 +03:00
}
static struct block * _alloc_block ( struct bcache * cache )
{
2018-02-02 17:34:45 +03:00
if ( dm_list_empty ( & cache - > free ) )
return NULL ;
return dm_list_struct_base ( _list_pop ( & cache - > free ) , struct block , list ) ;
2018-01-30 13:46:08 +03:00
}
2018-05-30 16:17:26 +03:00
static void _free_block ( struct block * b )
{
dm_list_add ( & b - > cache - > free , & b - > list ) ;
}
2018-01-30 13:46:08 +03:00
/*----------------------------------------------------------------
* Clean / dirty list management .
* Always use these methods to ensure nr_dirty_ is correct .
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void _unlink_block ( struct block * b )
{
if ( _test_flags ( b , BF_DIRTY ) )
b - > cache - > nr_dirty - - ;
dm_list_del ( & b - > list ) ;
}
static void _link_block ( struct block * b )
{
struct bcache * cache = b - > cache ;
if ( _test_flags ( b , BF_DIRTY ) ) {
dm_list_add ( & cache - > dirty , & b - > list ) ;
cache - > nr_dirty + + ;
} else
dm_list_add ( & cache - > clean , & b - > list ) ;
}
static void _relink ( struct block * b )
{
_unlink_block ( b ) ;
_link_block ( b ) ;
}
/*----------------------------------------------------------------
* Low level IO handling
*
* We cannot have two concurrent writes on the same block .
* eg , background writeback , put with dirty , flush ?
*
* To avoid this we introduce some restrictions :
*
* i ) A held block can never be written back .
* ii ) You cannot get a block until writeback has completed .
*
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2018-02-01 17:52:43 +03:00
static void _complete_io ( void * context , int err )
{
struct block * b = context ;
struct bcache * cache = b - > cache ;
b - > error = err ;
_clear_flags ( b , BF_IO_PENDING ) ;
cache - > nr_io_pending - - ;
/*
* b is on the io_pending list , so we don ' t want to use unlink_block .
* Which would incorrectly adjust nr_dirty .
*/
dm_list_del ( & b - > list ) ;
2018-03-01 19:17:32 +03:00
if ( b - > error ) {
2018-04-26 15:13:27 +03:00
dm_list_add ( & cache - > errored , & b - > list ) ;
2018-02-20 18:33:27 +03:00
} else {
2018-02-01 17:52:43 +03:00
_clear_flags ( b , BF_DIRTY ) ;
_link_block ( b ) ;
}
}
2018-01-30 13:46:08 +03:00
/*
* | b - > list | should be valid ( either pointing to itself , on one of the other
* lists .
*/
2018-02-20 18:33:27 +03:00
static void _issue_low_level ( struct block * b , enum dir d )
2018-01-30 13:46:08 +03:00
{
struct bcache * cache = b - > cache ;
sector_t sb = b - > index * cache - > block_sectors ;
sector_t se = sb + cache - > block_sectors ;
if ( _test_flags ( b , BF_IO_PENDING ) )
2018-02-20 18:33:27 +03:00
return ;
2018-01-30 13:46:08 +03:00
2018-02-20 18:33:27 +03:00
b - > io_dir = d ;
2018-01-30 13:46:08 +03:00
_set_flags ( b , BF_IO_PENDING ) ;
2018-05-16 12:09:17 +03:00
cache - > nr_io_pending + + ;
2018-04-27 12:56:13 +03:00
dm_list_move ( & cache - > io_pending , & b - > list ) ;
2018-02-05 19:04:23 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
if ( ! cache - > engine - > issue ( cache - > engine , d , b - > di , sb , se , b - > data , b ) ) {
2019-10-29 18:14:07 +03:00
/* FIXME: if io_submit() set an errno, return that instead of EIO? */
_complete_io ( b , - EIO ) ;
2018-02-20 18:33:27 +03:00
return ;
2018-02-01 17:52:43 +03:00
}
2018-01-30 13:46:08 +03:00
}
2018-02-20 18:33:27 +03:00
static inline void _issue_read ( struct block * b )
2018-01-30 13:46:08 +03:00
{
2018-02-20 18:33:27 +03:00
_issue_low_level ( b , DIR_READ ) ;
2018-01-30 13:46:08 +03:00
}
2018-02-20 18:33:27 +03:00
static inline void _issue_write ( struct block * b )
2018-01-30 13:46:08 +03:00
{
2018-02-20 18:33:27 +03:00
_issue_low_level ( b , DIR_WRITE ) ;
2018-01-30 13:46:08 +03:00
}
2018-01-30 15:13:48 +03:00
static bool _wait_io ( struct bcache * cache )
2018-01-30 13:46:08 +03:00
{
2018-02-05 19:04:23 +03:00
return cache - > engine - > wait ( cache - > engine , _complete_io ) ;
2018-01-30 13:46:08 +03:00
}
/*----------------------------------------------------------------
* High level IO handling
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void _wait_all ( struct bcache * cache )
{
while ( ! dm_list_empty ( & cache - > io_pending ) )
_wait_io ( cache ) ;
}
static void _wait_specific ( struct block * b )
{
while ( _test_flags ( b , BF_IO_PENDING ) )
_wait_io ( b - > cache ) ;
}
static unsigned _writeback ( struct bcache * cache , unsigned count )
{
unsigned actual = 0 ;
struct block * b , * tmp ;
dm_list_iterate_items_gen_safe ( b , tmp , & cache - > dirty , list ) {
if ( actual = = count )
break ;
// We can't writeback anything that's still in use.
if ( ! b - > ref_count ) {
_issue_write ( b ) ;
actual + + ;
}
}
return actual ;
}
/*----------------------------------------------------------------
* High level allocation
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static struct block * _find_unused_clean_block ( struct bcache * cache )
{
struct block * b ;
dm_list_iterate_items ( b , & cache - > clean ) {
if ( ! b - > ref_count ) {
_unlink_block ( b ) ;
2018-05-30 16:17:26 +03:00
_block_remove ( b ) ;
2018-01-30 13:46:08 +03:00
return b ;
}
}
return NULL ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
static struct block * _new_block ( struct bcache * cache , int di , block_address i , bool can_wait )
2018-01-30 13:46:08 +03:00
{
struct block * b ;
b = _alloc_block ( cache ) ;
2020-06-24 14:37:23 +03:00
while ( ! b ) {
2018-01-30 13:46:08 +03:00
b = _find_unused_clean_block ( cache ) ;
if ( ! b ) {
2018-02-05 19:04:23 +03:00
if ( can_wait ) {
if ( dm_list_empty ( & cache - > io_pending ) )
_writeback ( cache , 16 ) ; // FIXME: magic number
2020-10-02 18:18:12 +03:00
_wait_all ( cache ) ;
2020-10-02 18:42:50 +03:00
if ( dm_list_size ( & cache - > errored ) > = cache - > max_io ) {
log_debug ( " bcache no new blocks for di %d index %u with >%d errors. " ,
di , ( uint32_t ) i , cache - > max_io ) ;
return NULL ;
}
2018-02-27 21:37:25 +03:00
} else {
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
log_debug ( " bcache no new blocks for di %d index %u " ,
di , ( uint32_t ) i ) ;
2018-02-05 19:04:23 +03:00
return NULL ;
2018-02-27 21:37:25 +03:00
}
2018-01-30 13:46:08 +03:00
}
}
if ( b ) {
dm_list_init ( & b - > list ) ;
b - > flags = 0 ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b - > di = di ;
2018-05-01 15:21:53 +03:00
b - > index = i ;
2018-01-30 13:46:08 +03:00
b - > ref_count = 0 ;
b - > error = 0 ;
2018-05-30 16:17:26 +03:00
if ( ! _block_insert ( b ) ) {
log_error ( " bcache unable to insert block in radix tree (OOM?) " ) ;
_free_block ( b ) ;
return NULL ;
}
2018-02-27 21:37:25 +03:00
}
2018-01-30 13:46:08 +03:00
return b ;
}
/*----------------------------------------------------------------
* Block reference counting
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
static void _zero_block ( struct block * b )
{
b - > cache - > write_zeroes + + ;
memset ( b - > data , 0 , b - > cache - > block_sectors < < SECTOR_SHIFT ) ;
_set_flags ( b , BF_DIRTY ) ;
}
static void _hit ( struct block * b , unsigned flags )
{
struct bcache * cache = b - > cache ;
if ( flags & ( GF_ZERO | GF_DIRTY ) )
cache - > write_hits + + ;
else
cache - > read_hits + + ;
_relink ( b ) ;
}
static void _miss ( struct bcache * cache , unsigned flags )
{
if ( flags & ( GF_ZERO | GF_DIRTY ) )
cache - > write_misses + + ;
else
cache - > read_misses + + ;
}
static struct block * _lookup_or_read_block ( struct bcache * cache ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
int di , block_address i ,
2018-01-30 13:46:08 +03:00
unsigned flags )
{
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
struct block * b = _block_lookup ( cache , di , i ) ;
2018-01-30 13:46:08 +03:00
if ( b ) {
// FIXME: this is insufficient. We need to also catch a read
// lock of a write locked block. Ref count needs to distinguish.
if ( b - > ref_count & & ( flags & ( GF_DIRTY | GF_ZERO ) ) ) {
2018-01-30 15:13:48 +03:00
log_warn ( " concurrent write lock attempted " ) ;
2018-01-30 13:46:08 +03:00
return NULL ;
}
if ( _test_flags ( b , BF_IO_PENDING ) ) {
_miss ( cache , flags ) ;
_wait_specific ( b ) ;
} else
_hit ( b , flags ) ;
_unlink_block ( b ) ;
if ( flags & GF_ZERO )
_zero_block ( b ) ;
} else {
_miss ( cache , flags ) ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b = _new_block ( cache , di , i , true ) ;
2018-01-30 13:46:08 +03:00
if ( b ) {
if ( flags & GF_ZERO )
_zero_block ( b ) ;
else {
_issue_read ( b ) ;
_wait_specific ( b ) ;
// we know the block is clean and unerrored.
_unlink_block ( b ) ;
}
}
}
2018-03-01 19:17:32 +03:00
if ( b ) {
2018-01-30 13:46:08 +03:00
if ( flags & ( GF_DIRTY | GF_ZERO ) )
_set_flags ( b , BF_DIRTY ) ;
_link_block ( b ) ;
return b ;
}
return NULL ;
}
static void _preemptive_writeback ( struct bcache * cache )
{
// FIXME: this ignores those blocks that are in the error state. Track
// nr_clean instead?
unsigned nr_available = cache - > nr_cache_blocks - ( cache - > nr_dirty - cache - > nr_io_pending ) ;
if ( nr_available < ( WRITEBACK_LOW_THRESHOLD_PERCENT * cache - > nr_cache_blocks / 100 ) )
_writeback ( cache , ( WRITEBACK_HIGH_THRESHOLD_PERCENT * cache - > nr_cache_blocks / 100 ) - nr_available ) ;
}
/*----------------------------------------------------------------
* Public interface
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2018-02-05 19:04:23 +03:00
struct bcache * bcache_create ( sector_t block_sectors , unsigned nr_cache_blocks ,
struct io_engine * engine )
2018-01-30 13:46:08 +03:00
{
2021-09-19 21:23:24 +03:00
static long _pagesize = 0 ;
2018-01-30 13:46:08 +03:00
struct bcache * cache ;
2018-02-05 19:04:23 +03:00
unsigned max_io = engine - > max_io ( engine ) ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
int i ;
2018-01-30 13:46:08 +03:00
2021-09-19 21:23:24 +03:00
if ( ( _pagesize < = 0 ) & & ( ( _pagesize = sysconf ( _SC_PAGESIZE ) ) < 0 ) ) {
2018-10-15 16:02:09 +03:00
log_warn ( " WARNING: _SC_PAGESIZE returns negative value. " ) ;
return NULL ;
}
2018-02-01 17:52:43 +03:00
if ( ! nr_cache_blocks ) {
log_warn ( " bcache must have at least one cache block " ) ;
return NULL ;
}
if ( ! block_sectors ) {
log_warn ( " bcache must have a non zero block size " ) ;
return NULL ;
}
2021-09-19 21:23:24 +03:00
if ( block_sectors & ( ( _pagesize > > SECTOR_SHIFT ) - 1 ) ) {
2018-02-01 17:52:43 +03:00
log_warn ( " bcache block size must be a multiple of page size " ) ;
return NULL ;
}
2018-06-08 15:40:53 +03:00
cache = malloc ( sizeof ( * cache ) ) ;
2018-01-30 13:46:08 +03:00
if ( ! cache )
return NULL ;
cache - > block_sectors = block_sectors ;
cache - > nr_cache_blocks = nr_cache_blocks ;
2018-02-05 19:04:23 +03:00
cache - > max_io = nr_cache_blocks < max_io ? nr_cache_blocks : max_io ;
cache - > engine = engine ;
2018-01-30 13:46:08 +03:00
cache - > nr_locked = 0 ;
cache - > nr_dirty = 0 ;
cache - > nr_io_pending = 0 ;
dm_list_init ( & cache - > free ) ;
dm_list_init ( & cache - > errored ) ;
dm_list_init ( & cache - > dirty ) ;
dm_list_init ( & cache - > clean ) ;
dm_list_init ( & cache - > io_pending ) ;
2018-05-30 16:17:26 +03:00
cache - > rtree = radix_tree_create ( NULL , NULL ) ;
if ( ! cache - > rtree ) {
2018-02-05 19:04:23 +03:00
cache - > engine - > destroy ( cache - > engine ) ;
2018-06-08 15:40:53 +03:00
free ( cache ) ;
2018-01-30 15:13:48 +03:00
return NULL ;
2018-01-30 13:46:08 +03:00
}
cache - > read_hits = 0 ;
cache - > read_misses = 0 ;
cache - > write_zeroes = 0 ;
cache - > write_hits = 0 ;
cache - > write_misses = 0 ;
cache - > prefetches = 0 ;
2021-09-19 21:23:24 +03:00
if ( ! _init_free_list ( cache , nr_cache_blocks , _pagesize ) ) {
2018-02-05 19:04:23 +03:00
cache - > engine - > destroy ( cache - > engine ) ;
2018-05-30 16:17:26 +03:00
radix_tree_destroy ( cache - > rtree ) ;
2018-06-08 15:40:53 +03:00
free ( cache ) ;
2018-01-30 15:13:48 +03:00
return NULL ;
2018-01-30 13:46:08 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
_fd_table_size = FD_TABLE_INC ;
if ( ! ( _fd_table = malloc ( sizeof ( int ) * _fd_table_size ) ) ) {
cache - > engine - > destroy ( cache - > engine ) ;
radix_tree_destroy ( cache - > rtree ) ;
free ( cache ) ;
return NULL ;
}
for ( i = 0 ; i < _fd_table_size ; i + + )
_fd_table [ i ] = - 1 ;
2018-01-30 13:46:08 +03:00
return cache ;
}
void bcache_destroy ( struct bcache * cache )
{
if ( cache - > nr_locked )
2018-01-30 15:13:48 +03:00
log_warn ( " some blocks are still locked " ) ;
2018-01-30 13:46:08 +03:00
2018-11-03 00:19:26 +03:00
if ( ! bcache_flush ( cache ) )
stack ;
2018-01-30 13:46:08 +03:00
_wait_all ( cache ) ;
_exit_free_list ( cache ) ;
2018-05-30 16:17:26 +03:00
radix_tree_destroy ( cache - > rtree ) ;
2018-02-05 19:04:23 +03:00
cache - > engine - > destroy ( cache - > engine ) ;
2018-06-08 15:40:53 +03:00
free ( cache ) ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
free ( _fd_table ) ;
_fd_table = NULL ;
_fd_table_size = 0 ;
2018-01-30 13:46:08 +03:00
}
2018-05-03 11:33:55 +03:00
sector_t bcache_block_sectors ( struct bcache * cache )
{
return cache - > block_sectors ;
}
2018-02-01 17:52:43 +03:00
unsigned bcache_nr_cache_blocks ( struct bcache * cache )
{
return cache - > nr_cache_blocks ;
}
2018-02-02 15:06:14 +03:00
unsigned bcache_max_prefetches ( struct bcache * cache )
{
return cache - > max_io ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
void bcache_prefetch ( struct bcache * cache , int di , block_address i )
2018-01-30 13:46:08 +03:00
{
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
struct block * b = _block_lookup ( cache , di , i ) ;
2018-01-30 13:46:08 +03:00
if ( ! b ) {
2018-02-05 19:04:23 +03:00
if ( cache - > nr_io_pending < cache - > max_io ) {
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b = _new_block ( cache , di , i , false ) ;
2018-02-05 19:04:23 +03:00
if ( b ) {
cache - > prefetches + + ;
_issue_read ( b ) ;
}
2018-02-02 15:06:14 +03:00
}
2018-01-30 13:46:08 +03:00
}
}
2018-05-30 16:17:26 +03:00
//----------------------------------------------------------------
2018-04-27 12:56:13 +03:00
static void _recycle_block ( struct bcache * cache , struct block * b )
{
_unlink_block ( b ) ;
2018-05-30 16:17:26 +03:00
_block_remove ( b ) ;
_free_block ( b ) ;
2018-04-27 12:56:13 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
bool bcache_get ( struct bcache * cache , int di , block_address i ,
2018-05-10 15:26:08 +03:00
unsigned flags , struct block * * result )
2018-01-30 13:46:08 +03:00
{
2018-02-01 17:52:43 +03:00
struct block * b ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
if ( di > = _fd_table_size )
goto bad ;
b = _lookup_or_read_block ( cache , di , i , flags ) ;
2018-01-30 13:46:08 +03:00
if ( b ) {
2018-04-26 15:13:27 +03:00
if ( b - > error ) {
if ( b - > io_dir = = DIR_READ ) {
// Now we know the read failed we can just forget
// about this block, since there's no dirty data to
// be written back.
2018-04-27 12:56:13 +03:00
_recycle_block ( cache , b ) ;
2018-04-26 15:13:27 +03:00
}
return false ;
}
2018-01-30 13:46:08 +03:00
if ( ! b - > ref_count )
cache - > nr_locked + + ;
b - > ref_count + + ;
* result = b ;
return true ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
bad :
2018-01-30 13:46:08 +03:00
* result = NULL ;
2018-04-06 21:11:39 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
log_error ( " bcache failed to get block %u di %d " , ( uint32_t ) i , di ) ;
2018-01-30 13:46:08 +03:00
return false ;
}
2018-05-30 16:17:26 +03:00
//----------------------------------------------------------------
2018-02-20 00:40:44 +03:00
static void _put_ref ( struct block * b )
2018-01-30 13:46:08 +03:00
{
if ( ! b - > ref_count ) {
2018-01-30 15:13:48 +03:00
log_warn ( " ref count on bcache block already zero " ) ;
2018-01-30 13:46:08 +03:00
return ;
}
b - > ref_count - - ;
if ( ! b - > ref_count )
b - > cache - > nr_locked - - ;
2018-02-20 00:40:44 +03:00
}
void bcache_put ( struct block * b )
{
_put_ref ( b ) ;
2018-01-30 13:46:08 +03:00
if ( _test_flags ( b , BF_DIRTY ) )
_preemptive_writeback ( b - > cache ) ;
}
2018-05-30 16:17:26 +03:00
//----------------------------------------------------------------
2018-02-20 18:33:27 +03:00
bool bcache_flush ( struct bcache * cache )
2018-01-30 13:46:08 +03:00
{
2018-02-20 18:33:27 +03:00
// Only dirty data is on the errored list, since bad read blocks get
// recycled straight away. So we put these back on the dirty list, and
// try and rewrite everything.
dm_list_splice ( & cache - > dirty , & cache - > errored ) ;
2018-01-30 13:46:08 +03:00
while ( ! dm_list_empty ( & cache - > dirty ) ) {
struct block * b = dm_list_item ( _list_pop ( & cache - > dirty ) , struct block ) ;
2018-02-05 19:04:23 +03:00
if ( b - > ref_count | | _test_flags ( b , BF_IO_PENDING ) ) {
2018-01-30 13:46:08 +03:00
// The superblock may well be still locked.
continue ;
2018-02-05 19:04:23 +03:00
}
2018-04-27 12:56:13 +03:00
2018-01-30 13:46:08 +03:00
_issue_write ( b ) ;
}
_wait_all ( cache ) ;
2018-02-20 18:33:27 +03:00
return dm_list_empty ( & cache - > errored ) ;
2018-01-30 13:46:08 +03:00
}
2018-05-30 16:17:26 +03:00
//----------------------------------------------------------------
2018-02-02 10:59:49 +03:00
/*
* You can safely call this with a NULL block .
*/
2018-04-27 12:56:13 +03:00
static bool _invalidate_block ( struct bcache * cache , struct block * b )
2018-02-02 10:59:49 +03:00
{
if ( ! b )
2018-04-27 12:56:13 +03:00
return true ;
2018-02-02 10:59:49 +03:00
if ( _test_flags ( b , BF_IO_PENDING ) )
_wait_specific ( b ) ;
2018-04-27 12:56:13 +03:00
if ( b - > ref_count ) {
2018-02-02 10:59:49 +03:00
log_warn ( " bcache_invalidate: block (%d, %llu) still held " ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b - > di , ( unsigned long long ) b - > index ) ;
2018-04-27 12:56:13 +03:00
return false ;
}
2018-02-02 10:59:49 +03:00
2018-04-27 12:56:13 +03:00
if ( _test_flags ( b , BF_DIRTY ) ) {
_issue_write ( b ) ;
_wait_specific ( b ) ;
if ( b - > error )
return false ;
2018-02-02 10:59:49 +03:00
}
2018-04-27 12:56:13 +03:00
_recycle_block ( cache , b ) ;
return true ;
2018-02-02 10:59:49 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
bool bcache_invalidate ( struct bcache * cache , int di , block_address i )
2018-02-02 10:59:49 +03:00
{
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
return _invalidate_block ( cache , _block_lookup ( cache , di , i ) ) ;
2018-05-30 16:17:26 +03:00
}
//----------------------------------------------------------------
struct invalidate_iterator {
bool success ;
struct radix_tree_iterator it ;
} ;
static bool _writeback_v ( struct radix_tree_iterator * it ,
uint8_t * kb , uint8_t * ke , union radix_value v )
{
struct block * b = v . ptr ;
if ( _test_flags ( b , BF_DIRTY ) )
2021-02-07 16:06:12 +03:00
_issue_write ( b ) ;
2018-05-30 16:17:26 +03:00
2021-02-07 16:06:12 +03:00
return true ;
2018-05-30 16:17:26 +03:00
}
static bool _invalidate_v ( struct radix_tree_iterator * it ,
uint8_t * kb , uint8_t * ke , union radix_value v )
{
struct block * b = v . ptr ;
2021-02-07 16:06:12 +03:00
struct invalidate_iterator * iit = container_of ( it , struct invalidate_iterator , it ) ;
2018-05-30 16:17:26 +03:00
if ( b - > error | | _test_flags ( b , BF_DIRTY ) ) {
2021-02-07 16:06:12 +03:00
log_warn ( " WARNING: bcache_invalidate: block (%d, %llu) still dirty. " ,
b - > di , ( unsigned long long ) b - > index ) ;
iit - > success = false ;
return true ;
2018-05-30 16:17:26 +03:00
}
if ( b - > ref_count ) {
2021-02-07 16:06:12 +03:00
log_warn ( " WARNING: bcache_invalidate: block (%d, %llu) still held. " ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b - > di , ( unsigned long long ) b - > index ) ;
2018-05-30 16:17:26 +03:00
iit - > success = false ;
return true ;
}
_unlink_block ( b ) ;
_free_block ( b ) ;
// We can't remove the block from the radix tree yet because
// we're in the middle of an iteration.
return true ;
2018-02-02 10:59:49 +03:00
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
bool bcache_invalidate_di ( struct bcache * cache , int di )
2018-02-02 10:59:49 +03:00
{
2021-02-07 16:06:12 +03:00
union key k ;
2018-05-30 16:17:26 +03:00
struct invalidate_iterator it ;
2018-02-02 10:59:49 +03:00
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
k . parts . di = di ;
2018-02-02 10:59:49 +03:00
2018-05-30 16:17:26 +03:00
it . it . visit = _writeback_v ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
radix_tree_iterate ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . parts . di ) , & it . it ) ;
2018-02-02 10:59:49 +03:00
2018-05-30 16:17:26 +03:00
_wait_all ( cache ) ;
2018-02-02 10:59:49 +03:00
2018-05-30 16:17:26 +03:00
it . success = true ;
it . it . visit = _invalidate_v ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
radix_tree_iterate ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . parts . di ) , & it . it ) ;
2019-10-29 18:21:11 +03:00
if ( it . success )
2021-03-15 12:49:20 +03:00
( void ) radix_tree_remove_prefix ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . parts . di ) ) ;
2019-10-29 18:21:11 +03:00
2018-05-30 16:17:26 +03:00
return it . success ;
2018-02-02 10:59:49 +03:00
}
2018-01-30 13:46:08 +03:00
//----------------------------------------------------------------
2019-10-28 17:29:47 +03:00
static bool _abort_v ( struct radix_tree_iterator * it ,
uint8_t * kb , uint8_t * ke , union radix_value v )
{
struct block * b = v . ptr ;
if ( b - > ref_count ) {
log_fatal ( " bcache_abort: block (%d, %llu) still held " ,
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
b - > di , ( unsigned long long ) b - > index ) ;
2019-10-28 17:29:47 +03:00
return true ;
}
_unlink_block ( b ) ;
_free_block ( b ) ;
// We can't remove the block from the radix tree yet because
// we're in the middle of an iteration.
return true ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
void bcache_abort_di ( struct bcache * cache , int di )
2019-10-28 17:29:47 +03:00
{
2021-02-07 16:06:12 +03:00
union key k ;
2019-10-28 17:29:47 +03:00
struct radix_tree_iterator it ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
k . parts . di = di ;
2019-10-28 17:29:47 +03:00
it . visit = _abort_v ;
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
radix_tree_iterate ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . parts . di ) , & it ) ;
2021-03-15 12:49:20 +03:00
( void ) radix_tree_remove_prefix ( cache - > rtree , k . bytes , k . bytes + sizeof ( k . parts . di ) ) ;
2019-10-28 17:29:47 +03:00
}
//----------------------------------------------------------------
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
void bcache_set_last_byte ( struct bcache * cache , int di , uint64_t offset , int sector_size )
2018-10-30 00:53:17 +03:00
{
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
_last_byte_di = di ;
2018-10-30 00:53:17 +03:00
_last_byte_offset = offset ;
_last_byte_sector_size = sector_size ;
if ( ! sector_size )
_last_byte_sector_size = 512 ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
void bcache_unset_last_byte ( struct bcache * cache , int di )
2018-10-30 00:53:17 +03:00
{
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
if ( _last_byte_di = = di ) {
_last_byte_di = 0 ;
2018-10-30 00:53:17 +03:00
_last_byte_offset = 0 ;
_last_byte_sector_size = 0 ;
}
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
int bcache_set_fd ( int fd )
{
int * new_table = NULL ;
int new_size = 0 ;
int i ;
retry :
for ( i = 0 ; i < _fd_table_size ; i + + ) {
if ( _fd_table [ i ] = = - 1 ) {
_fd_table [ i ] = fd ;
return i ;
}
}
/* already tried once, shouldn't happen */
if ( new_size )
return - 1 ;
new_size = _fd_table_size + FD_TABLE_INC ;
new_table = realloc ( _fd_table , sizeof ( int ) * new_size ) ;
if ( ! new_table ) {
log_error ( " Cannot extend bcache fd table " ) ;
return - 1 ;
}
for ( i = _fd_table_size ; i < new_size ; i + + )
new_table [ i ] = - 1 ;
_fd_table = new_table ;
_fd_table_size = new_size ;
goto retry ;
}
/*
* Should we check for unflushed or inprogress io on an fd
* prior to doing clear_fd or change_fd ? ( To catch mistakes ;
* the caller should be smart enough to not do that . )
*/
void bcache_clear_fd ( int di )
{
if ( di > = _fd_table_size )
return ;
_fd_table [ di ] = - 1 ;
}
int bcache_change_fd ( int di , int fd )
{
if ( di > = _fd_table_size )
2020-09-20 00:00:50 +03:00
return 0 ;
if ( di < 0 ) {
2021-02-07 16:06:12 +03:00
log_error ( INTERNAL_ERROR " Cannot change not opened DI with FD:%d " , fd ) ;
2020-09-20 00:00:50 +03:00
return 0 ;
}
bcache: use indirection table for fd
Add a "device index" (di) for each device, and use this
in the bcache api to the rest of lvm. This replaces the
file descriptor (fd) in the api. The rest of lvm uses
new functions bcache_set_fd(), bcache_clear_fd(), and
bcache_change_fd() to control which fd bcache uses for
io to a particular device.
. lvm opens a dev and gets and fd.
fd = open(dev);
. lvm passes fd to the bcache layer and gets a di
to use in the bcache api for the dev.
di = bcache_set_fd(fd);
. lvm uses bcache functions, passing di for the dev.
bcache_write_bytes(di, ...), etc.
. bcache translates di to fd to do io.
. lvm closes the device and clears the di/fd bcache state.
close(fd);
bcache_clear_fd(di);
In the bcache layer, a di-to-fd translation table
(int *_fd_table) is added. When bcache needs to
perform io on a di, it uses _fd_table[di].
In the following commit, lvm will make use of the new
bcache_change_fd() function to change the fd that
bcache uses for the dev, without dropping cached blocks.
2020-09-17 17:40:18 +03:00
_fd_table [ di ] = fd ;
return 1 ;
}