2011-10-31 20:19:11 +00:00
/*
* Copyright ( C ) 2011 Red Hat , Inc .
*
* This file is released under the GPL .
*/
# include "dm-block-manager.h"
# include "dm-persistent-data-internal.h"
# include "../dm-bufio.h"
# include <linux/crc32c.h>
# include <linux/module.h>
# include <linux/slab.h>
# include <linux/rwsem.h>
# include <linux/device-mapper.h>
# include <linux/stacktrace.h>
# define DM_MSG_PREFIX "block manager"
/*----------------------------------------------------------------*/
/*
* This is a read / write semaphore with a couple of differences .
*
* i ) There is a restriction on the number of concurrent read locks that
* may be held at once . This is just an implementation detail .
*
* ii ) Recursive locking attempts are detected and return EINVAL . A stack
2012-10-30 00:18:08 +09:00
* trace is also emitted for the previous lock acquisition .
2011-10-31 20:19:11 +00:00
*
* iii ) Priority is given to write locks .
*/
# define MAX_HOLDERS 4
# define MAX_STACK 10
typedef unsigned long stack_entries [ MAX_STACK ] ;
struct block_lock {
spinlock_t lock ;
__s32 count ;
struct list_head waiters ;
struct task_struct * holders [ MAX_HOLDERS ] ;
# ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
struct stack_trace traces [ MAX_HOLDERS ] ;
stack_entries entries [ MAX_HOLDERS ] ;
# endif
} ;
struct waiter {
struct list_head list ;
struct task_struct * task ;
int wants_write ;
} ;
static unsigned __find_holder ( struct block_lock * lock ,
struct task_struct * task )
{
unsigned i ;
for ( i = 0 ; i < MAX_HOLDERS ; i + + )
if ( lock - > holders [ i ] = = task )
break ;
BUG_ON ( i = = MAX_HOLDERS ) ;
return i ;
}
/* call this *after* you increment lock->count */
static void __add_holder ( struct block_lock * lock , struct task_struct * task )
{
unsigned h = __find_holder ( lock , NULL ) ;
# ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
struct stack_trace * t ;
# endif
get_task_struct ( task ) ;
lock - > holders [ h ] = task ;
# ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
t = lock - > traces + h ;
t - > nr_entries = 0 ;
t - > max_entries = MAX_STACK ;
t - > entries = lock - > entries [ h ] ;
t - > skip = 2 ;
save_stack_trace ( t ) ;
# endif
}
/* call this *before* you decrement lock->count */
static void __del_holder ( struct block_lock * lock , struct task_struct * task )
{
unsigned h = __find_holder ( lock , task ) ;
lock - > holders [ h ] = NULL ;
put_task_struct ( task ) ;
}
static int __check_holder ( struct block_lock * lock )
{
unsigned i ;
# ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
static struct stack_trace t ;
static stack_entries entries ;
# endif
for ( i = 0 ; i < MAX_HOLDERS ; i + + ) {
if ( lock - > holders [ i ] = = current ) {
2013-12-13 08:24:44 -05:00
DMERR ( " recursive lock detected in metadata " ) ;
2011-10-31 20:19:11 +00:00
# ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
DMERR ( " previously held here: " ) ;
print_stack_trace ( lock - > traces + i , 4 ) ;
2012-10-30 00:18:08 +09:00
DMERR ( " subsequent acquisition attempted here: " ) ;
2011-10-31 20:19:11 +00:00
t . nr_entries = 0 ;
t . max_entries = MAX_STACK ;
t . entries = entries ;
t . skip = 3 ;
save_stack_trace ( & t ) ;
print_stack_trace ( & t , 4 ) ;
# endif
return - EINVAL ;
}
}
return 0 ;
}
static void __wait ( struct waiter * w )
{
for ( ; ; ) {
set_task_state ( current , TASK_UNINTERRUPTIBLE ) ;
if ( ! w - > task )
break ;
schedule ( ) ;
}
set_task_state ( current , TASK_RUNNING ) ;
}
static void __wake_waiter ( struct waiter * w )
{
struct task_struct * task ;
list_del ( & w - > list ) ;
task = w - > task ;
smp_mb ( ) ;
w - > task = NULL ;
wake_up_process ( task ) ;
}
/*
* We either wake a few readers or a single writer .
*/
static void __wake_many ( struct block_lock * lock )
{
struct waiter * w , * tmp ;
BUG_ON ( lock - > count < 0 ) ;
list_for_each_entry_safe ( w , tmp , & lock - > waiters , list ) {
if ( lock - > count > = MAX_HOLDERS )
return ;
if ( w - > wants_write ) {
if ( lock - > count > 0 )
return ; /* still read locked */
lock - > count = - 1 ;
__add_holder ( lock , w - > task ) ;
__wake_waiter ( w ) ;
return ;
}
lock - > count + + ;
__add_holder ( lock , w - > task ) ;
__wake_waiter ( w ) ;
}
}
static void bl_init ( struct block_lock * lock )
{
int i ;
spin_lock_init ( & lock - > lock ) ;
lock - > count = 0 ;
INIT_LIST_HEAD ( & lock - > waiters ) ;
for ( i = 0 ; i < MAX_HOLDERS ; i + + )
lock - > holders [ i ] = NULL ;
}
static int __available_for_read ( struct block_lock * lock )
{
return lock - > count > = 0 & &
lock - > count < MAX_HOLDERS & &
list_empty ( & lock - > waiters ) ;
}
static int bl_down_read ( struct block_lock * lock )
{
int r ;
struct waiter w ;
spin_lock ( & lock - > lock ) ;
r = __check_holder ( lock ) ;
if ( r ) {
spin_unlock ( & lock - > lock ) ;
return r ;
}
if ( __available_for_read ( lock ) ) {
lock - > count + + ;
__add_holder ( lock , current ) ;
spin_unlock ( & lock - > lock ) ;
return 0 ;
}
get_task_struct ( current ) ;
w . task = current ;
w . wants_write = 0 ;
list_add_tail ( & w . list , & lock - > waiters ) ;
spin_unlock ( & lock - > lock ) ;
__wait ( & w ) ;
put_task_struct ( current ) ;
return 0 ;
}
static int bl_down_read_nonblock ( struct block_lock * lock )
{
int r ;
spin_lock ( & lock - > lock ) ;
r = __check_holder ( lock ) ;
if ( r )
goto out ;
if ( __available_for_read ( lock ) ) {
lock - > count + + ;
__add_holder ( lock , current ) ;
r = 0 ;
} else
r = - EWOULDBLOCK ;
out :
spin_unlock ( & lock - > lock ) ;
return r ;
}
static void bl_up_read ( struct block_lock * lock )
{
spin_lock ( & lock - > lock ) ;
BUG_ON ( lock - > count < = 0 ) ;
__del_holder ( lock , current ) ;
- - lock - > count ;
if ( ! list_empty ( & lock - > waiters ) )
__wake_many ( lock ) ;
spin_unlock ( & lock - > lock ) ;
}
static int bl_down_write ( struct block_lock * lock )
{
int r ;
struct waiter w ;
spin_lock ( & lock - > lock ) ;
r = __check_holder ( lock ) ;
if ( r ) {
spin_unlock ( & lock - > lock ) ;
return r ;
}
if ( lock - > count = = 0 & & list_empty ( & lock - > waiters ) ) {
lock - > count = - 1 ;
__add_holder ( lock , current ) ;
spin_unlock ( & lock - > lock ) ;
return 0 ;
}
get_task_struct ( current ) ;
w . task = current ;
w . wants_write = 1 ;
/*
* Writers given priority . We know there ' s only one mutator in the
* system , so ignoring the ordering reversal .
*/
list_add ( & w . list , & lock - > waiters ) ;
spin_unlock ( & lock - > lock ) ;
__wait ( & w ) ;
put_task_struct ( current ) ;
return 0 ;
}
static void bl_up_write ( struct block_lock * lock )
{
spin_lock ( & lock - > lock ) ;
__del_holder ( lock , current ) ;
lock - > count = 0 ;
if ( ! list_empty ( & lock - > waiters ) )
__wake_many ( lock ) ;
spin_unlock ( & lock - > lock ) ;
}
static void report_recursive_bug ( dm_block_t b , int r )
{
if ( r = = - EINVAL )
DMERR ( " recursive acquisition of block %llu requested. " ,
( unsigned long long ) b ) ;
}
/*----------------------------------------------------------------*/
/*
* Block manager is currently implemented using dm - bufio . struct
* dm_block_manager and struct dm_block map directly onto a couple of
* structs in the bufio interface . I want to retain the freedom to move
* away from bufio in the future . So these structs are just cast within
* this . c file , rather than making it through to the public interface .
*/
static struct dm_buffer * to_buffer ( struct dm_block * b )
{
return ( struct dm_buffer * ) b ;
}
dm_block_t dm_block_location ( struct dm_block * b )
{
return dm_bufio_get_block_number ( to_buffer ( b ) ) ;
}
EXPORT_SYMBOL_GPL ( dm_block_location ) ;
void * dm_block_data ( struct dm_block * b )
{
return dm_bufio_get_block_data ( to_buffer ( b ) ) ;
}
EXPORT_SYMBOL_GPL ( dm_block_data ) ;
struct buffer_aux {
struct dm_block_validator * validator ;
struct block_lock lock ;
int write_locked ;
} ;
static void dm_block_manager_alloc_callback ( struct dm_buffer * buf )
{
struct buffer_aux * aux = dm_bufio_get_aux_data ( buf ) ;
aux - > validator = NULL ;
bl_init ( & aux - > lock ) ;
}
static void dm_block_manager_write_callback ( struct dm_buffer * buf )
{
struct buffer_aux * aux = dm_bufio_get_aux_data ( buf ) ;
if ( aux - > validator ) {
aux - > validator - > prepare_for_write ( aux - > validator , ( struct dm_block * ) buf ,
dm_bufio_get_block_size ( dm_bufio_get_client ( buf ) ) ) ;
}
}
/*----------------------------------------------------------------
* Public interface
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2012-07-27 15:08:08 +01:00
struct dm_block_manager {
struct dm_bufio_client * bufio ;
2012-07-27 15:08:15 +01:00
bool read_only : 1 ;
2012-07-27 15:08:08 +01:00
} ;
2011-10-31 20:19:11 +00:00
struct dm_block_manager * dm_block_manager_create ( struct block_device * bdev ,
unsigned block_size ,
unsigned cache_size ,
unsigned max_held_per_thread )
{
2012-07-27 15:08:08 +01:00
int r ;
struct dm_block_manager * bm ;
bm = kmalloc ( sizeof ( * bm ) , GFP_KERNEL ) ;
if ( ! bm ) {
r = - ENOMEM ;
goto bad ;
}
bm - > bufio = dm_bufio_client_create ( bdev , block_size , max_held_per_thread ,
sizeof ( struct buffer_aux ) ,
dm_block_manager_alloc_callback ,
dm_block_manager_write_callback ) ;
if ( IS_ERR ( bm - > bufio ) ) {
r = PTR_ERR ( bm - > bufio ) ;
kfree ( bm ) ;
goto bad ;
}
2012-07-27 15:08:15 +01:00
bm - > read_only = false ;
2012-07-27 15:08:08 +01:00
return bm ;
bad :
return ERR_PTR ( r ) ;
2011-10-31 20:19:11 +00:00
}
EXPORT_SYMBOL_GPL ( dm_block_manager_create ) ;
void dm_block_manager_destroy ( struct dm_block_manager * bm )
{
2012-07-27 15:08:08 +01:00
dm_bufio_client_destroy ( bm - > bufio ) ;
kfree ( bm ) ;
2011-10-31 20:19:11 +00:00
}
EXPORT_SYMBOL_GPL ( dm_block_manager_destroy ) ;
unsigned dm_bm_block_size ( struct dm_block_manager * bm )
{
2012-07-27 15:08:08 +01:00
return dm_bufio_get_block_size ( bm - > bufio ) ;
2011-10-31 20:19:11 +00:00
}
EXPORT_SYMBOL_GPL ( dm_bm_block_size ) ;
dm_block_t dm_bm_nr_blocks ( struct dm_block_manager * bm )
{
2012-07-27 15:08:08 +01:00
return dm_bufio_get_device_size ( bm - > bufio ) ;
2011-10-31 20:19:11 +00:00
}
static int dm_bm_validate_buffer ( struct dm_block_manager * bm ,
struct dm_buffer * buf ,
struct buffer_aux * aux ,
struct dm_block_validator * v )
{
if ( unlikely ( ! aux - > validator ) ) {
int r ;
if ( ! v )
return 0 ;
2012-07-27 15:08:08 +01:00
r = v - > check ( v , ( struct dm_block * ) buf , dm_bufio_get_block_size ( bm - > bufio ) ) ;
2012-12-21 20:23:34 +00:00
if ( unlikely ( r ) ) {
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " %s validator check failed for block %llu " , v - > name ,
( unsigned long long ) dm_bufio_get_block_number ( buf ) ) ;
2011-10-31 20:19:11 +00:00
return r ;
2012-12-21 20:23:34 +00:00
}
2011-10-31 20:19:11 +00:00
aux - > validator = v ;
} else {
if ( unlikely ( aux - > validator ! = v ) ) {
2012-12-21 20:23:34 +00:00
DMERR_LIMIT ( " validator mismatch (old=%s vs new=%s) for block %llu " ,
aux - > validator - > name , v ? v - > name : " NULL " ,
( unsigned long long ) dm_bufio_get_block_number ( buf ) ) ;
2011-10-31 20:19:11 +00:00
return - EINVAL ;
}
}
return 0 ;
}
int dm_bm_read_lock ( struct dm_block_manager * bm , dm_block_t b ,
struct dm_block_validator * v ,
struct dm_block * * result )
{
struct buffer_aux * aux ;
void * p ;
int r ;
2012-07-27 15:08:08 +01:00
p = dm_bufio_read ( bm - > bufio , b , ( struct dm_buffer * * ) result ) ;
2011-10-31 20:19:11 +00:00
if ( unlikely ( IS_ERR ( p ) ) )
return PTR_ERR ( p ) ;
aux = dm_bufio_get_aux_data ( to_buffer ( * result ) ) ;
r = bl_down_read ( & aux - > lock ) ;
if ( unlikely ( r ) ) {
dm_bufio_release ( to_buffer ( * result ) ) ;
report_recursive_bug ( b , r ) ;
return r ;
}
aux - > write_locked = 0 ;
r = dm_bm_validate_buffer ( bm , to_buffer ( * result ) , aux , v ) ;
if ( unlikely ( r ) ) {
bl_up_read ( & aux - > lock ) ;
dm_bufio_release ( to_buffer ( * result ) ) ;
return r ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( dm_bm_read_lock ) ;
int dm_bm_write_lock ( struct dm_block_manager * bm ,
dm_block_t b , struct dm_block_validator * v ,
struct dm_block * * result )
{
struct buffer_aux * aux ;
void * p ;
int r ;
2012-07-27 15:08:15 +01:00
if ( bm - > read_only )
return - EPERM ;
2012-07-27 15:08:08 +01:00
p = dm_bufio_read ( bm - > bufio , b , ( struct dm_buffer * * ) result ) ;
2011-10-31 20:19:11 +00:00
if ( unlikely ( IS_ERR ( p ) ) )
return PTR_ERR ( p ) ;
aux = dm_bufio_get_aux_data ( to_buffer ( * result ) ) ;
r = bl_down_write ( & aux - > lock ) ;
if ( r ) {
dm_bufio_release ( to_buffer ( * result ) ) ;
report_recursive_bug ( b , r ) ;
return r ;
}
aux - > write_locked = 1 ;
r = dm_bm_validate_buffer ( bm , to_buffer ( * result ) , aux , v ) ;
if ( unlikely ( r ) ) {
bl_up_write ( & aux - > lock ) ;
dm_bufio_release ( to_buffer ( * result ) ) ;
return r ;
}
return 0 ;
}
EXPORT_SYMBOL_GPL ( dm_bm_write_lock ) ;
int dm_bm_read_try_lock ( struct dm_block_manager * bm ,
dm_block_t b , struct dm_block_validator * v ,
struct dm_block * * result )
{
struct buffer_aux * aux ;
void * p ;
int r ;
2012-07-27 15:08:08 +01:00
p = dm_bufio_get ( bm - > bufio , b , ( struct dm_buffer * * ) result ) ;
2011-10-31 20:19:11 +00:00
if ( unlikely ( IS_ERR ( p ) ) )
return PTR_ERR ( p ) ;
if ( unlikely ( ! p ) )
return - EWOULDBLOCK ;
aux = dm_bufio_get_aux_data ( to_buffer ( * result ) ) ;
r = bl_down_read_nonblock ( & aux - > lock ) ;
if ( r < 0 ) {
dm_bufio_release ( to_buffer ( * result ) ) ;
report_recursive_bug ( b , r ) ;
return r ;
}
aux - > write_locked = 0 ;
r = dm_bm_validate_buffer ( bm , to_buffer ( * result ) , aux , v ) ;
if ( unlikely ( r ) ) {
bl_up_read ( & aux - > lock ) ;
dm_bufio_release ( to_buffer ( * result ) ) ;
return r ;
}
return 0 ;
}
int dm_bm_write_lock_zero ( struct dm_block_manager * bm ,
dm_block_t b , struct dm_block_validator * v ,
struct dm_block * * result )
{
int r ;
struct buffer_aux * aux ;
void * p ;
2012-07-27 15:08:15 +01:00
if ( bm - > read_only )
return - EPERM ;
2012-07-27 15:08:08 +01:00
p = dm_bufio_new ( bm - > bufio , b , ( struct dm_buffer * * ) result ) ;
2011-10-31 20:19:11 +00:00
if ( unlikely ( IS_ERR ( p ) ) )
return PTR_ERR ( p ) ;
memset ( p , 0 , dm_bm_block_size ( bm ) ) ;
aux = dm_bufio_get_aux_data ( to_buffer ( * result ) ) ;
r = bl_down_write ( & aux - > lock ) ;
if ( r ) {
dm_bufio_release ( to_buffer ( * result ) ) ;
return r ;
}
aux - > write_locked = 1 ;
aux - > validator = v ;
return 0 ;
}
2012-07-27 15:08:09 +01:00
EXPORT_SYMBOL_GPL ( dm_bm_write_lock_zero ) ;
2011-10-31 20:19:11 +00:00
int dm_bm_unlock ( struct dm_block * b )
{
struct buffer_aux * aux ;
aux = dm_bufio_get_aux_data ( to_buffer ( b ) ) ;
if ( aux - > write_locked ) {
dm_bufio_mark_buffer_dirty ( to_buffer ( b ) ) ;
bl_up_write ( & aux - > lock ) ;
} else
bl_up_read ( & aux - > lock ) ;
dm_bufio_release ( to_buffer ( b ) ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( dm_bm_unlock ) ;
2014-03-27 14:13:20 +00:00
int dm_bm_flush ( struct dm_block_manager * bm )
2011-10-31 20:19:11 +00:00
{
2012-07-27 15:08:15 +01:00
if ( bm - > read_only )
return - EPERM ;
2012-07-27 15:08:08 +01:00
return dm_bufio_write_dirty_buffers ( bm - > bufio ) ;
2011-10-31 20:19:11 +00:00
}
2014-03-27 14:13:20 +00:00
EXPORT_SYMBOL_GPL ( dm_bm_flush ) ;
2011-10-31 20:19:11 +00:00
2013-08-09 12:59:30 +01:00
void dm_bm_prefetch ( struct dm_block_manager * bm , dm_block_t b )
{
dm_bufio_prefetch ( bm - > bufio , b , 1 ) ;
}
2015-04-23 15:06:27 -04:00
bool dm_bm_is_read_only ( struct dm_block_manager * bm )
{
return bm - > read_only ;
}
EXPORT_SYMBOL_GPL ( dm_bm_is_read_only ) ;
2012-07-27 15:08:15 +01:00
void dm_bm_set_read_only ( struct dm_block_manager * bm )
{
bm - > read_only = true ;
}
EXPORT_SYMBOL_GPL ( dm_bm_set_read_only ) ;
2013-12-04 16:58:19 -05:00
void dm_bm_set_read_write ( struct dm_block_manager * bm )
{
bm - > read_only = false ;
}
EXPORT_SYMBOL_GPL ( dm_bm_set_read_write ) ;
2011-10-31 20:19:11 +00:00
u32 dm_bm_checksum ( const void * data , size_t len , u32 init_xor )
{
return crc32c ( ~ ( u32 ) 0 , data , len ) ^ init_xor ;
}
EXPORT_SYMBOL_GPL ( dm_bm_checksum ) ;
/*----------------------------------------------------------------*/
MODULE_LICENSE ( " GPL " ) ;
MODULE_AUTHOR ( " Joe Thornber <dm-devel@redhat.com> " ) ;
MODULE_DESCRIPTION ( " Immutable metadata library for dm " ) ;
/*----------------------------------------------------------------*/