2011-12-21 17:08:11 +04:00
/*
2017-01-20 23:41:23 +03:00
* Copyright ( C ) 2011 - 2017 Red Hat , Inc . All rights reserved .
2011-12-21 17:08:11 +04:00
*
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v .2 .1 .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program ; if not , write to the Free Software Foundation ,
2016-01-21 13:49:46 +03:00
* Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2011-12-21 17:08:11 +04:00
*/
2015-10-10 17:58:31 +03:00
# include "lib.h" /* using here lvm log */
2011-12-21 17:08:11 +04:00
# include "dmeventd_lvm.h"
2015-10-10 17:58:31 +03:00
# include "libdevmapper-event.h"
2011-12-21 17:08:11 +04:00
# include <sys/wait.h>
2014-04-15 15:27:47 +04:00
# include <stdarg.h>
2011-12-21 17:08:11 +04:00
2015-10-12 12:35:33 +03:00
/* TODO - move this mountinfo code into library to be reusable */
# ifdef __linux__
# include "kdev_t.h"
# else
# define MAJOR(x) major((x))
# define MINOR(x) minor((x))
# endif
/* First warning when thin data or metadata is 80% full. */
2015-10-20 15:19:35 +03:00
# define WARNING_THRESH (DM_PERCENT_1 * 80)
2016-06-29 16:56:29 +03:00
/* Umount thin LVs when thin data or metadata LV is >=
* and lvextend - - use - policies has failed . */
# define UMOUNT_THRESH (DM_PERCENT_1 * 95)
2011-12-21 17:08:11 +04:00
/* Run a check every 5%. */
2015-10-20 15:19:35 +03:00
# define CHECK_STEP (DM_PERCENT_1 * 5)
2015-10-12 12:35:33 +03:00
/* Do not bother checking thin data or metadata is less than 50% full. */
2015-10-20 15:19:35 +03:00
# define CHECK_MINIMUM (DM_PERCENT_1 * 50)
2011-12-21 17:08:11 +04:00
# define UMOUNT_COMMAND " / bin / umount"
2017-01-21 00:53:55 +03:00
# define MAX_FAILS (256) /* ~42 mins between cmd call retry with 10s delay */
2015-10-29 13:52:11 +03:00
2011-12-21 17:08:11 +04:00
# define THIN_DEBUG 0
struct dso_state {
struct dm_pool * mem ;
2012-01-19 19:21:23 +04:00
int metadata_percent_check ;
2017-01-18 16:06:29 +03:00
int metadata_percent ;
2011-12-21 17:08:11 +04:00
int data_percent_check ;
2017-01-18 16:06:29 +03:00
int data_percent ;
2012-01-19 19:21:23 +04:00
uint64_t known_metadata_size ;
2011-12-21 17:08:11 +04:00
uint64_t known_data_size ;
2015-10-29 13:52:11 +03:00
unsigned fails ;
2017-01-21 00:53:55 +03:00
unsigned max_fails ;
2017-01-18 11:55:46 +03:00
int restore_sigset ;
sigset_t old_sigset ;
2017-01-20 23:41:23 +03:00
pid_t pid ;
2017-02-13 00:07:23 +03:00
char * argv [ 3 ] ;
char * cmd_str ;
2011-12-21 17:08:11 +04:00
} ;
2015-10-10 17:58:31 +03:00
DM_EVENT_LOG_FN ( " thin " )
2017-01-20 23:41:23 +03:00
static int _run_command ( struct dso_state * state )
{
2017-01-23 16:23:24 +03:00
char val [ 3 ] [ 36 ] ;
char * env [ ] = { val [ 0 ] , val [ 1 ] , val [ 2 ] , NULL } ;
2017-01-20 23:41:23 +03:00
int i ;
2017-01-23 16:23:24 +03:00
/* Mark for possible lvm2 command we are running from dmeventd
* lvm2 will not try to talk back to dmeventd while processing it */
( void ) dm_snprintf ( val [ 0 ] , sizeof ( val [ 0 ] ) , " LVM_RUN_BY_DMEVENTD=1 " ) ;
2017-01-20 23:41:23 +03:00
if ( state - > data_percent ) {
/* Prepare some known data to env vars for easy use */
2017-01-23 16:23:24 +03:00
( void ) dm_snprintf ( val [ 1 ] , sizeof ( val [ 1 ] ) , " DMEVENTD_THIN_POOL_DATA=%d " ,
2017-01-20 23:41:23 +03:00
state - > data_percent / DM_PERCENT_1 ) ;
2017-01-23 16:23:24 +03:00
( void ) dm_snprintf ( val [ 2 ] , sizeof ( val [ 2 ] ) , " DMEVENTD_THIN_POOL_METADATA=%d " ,
2017-01-20 23:41:23 +03:00
state - > metadata_percent / DM_PERCENT_1 ) ;
} else {
/* For an error event it's for a user to check status and decide */
2017-01-23 16:23:24 +03:00
env [ 1 ] = NULL ;
2017-02-13 00:07:23 +03:00
log_debug ( " Error event processing. " ) ;
2017-01-20 23:41:23 +03:00
}
log_verbose ( " Executing command: %s " , state - > cmd_str ) ;
/* TODO:
* Support parallel run of ' task ' and it ' s waitpid maintainence
* ATM we can ' t handle signaling of SIGALRM
* as signalling is not allowed while ' process_event ( ) ' is running
*/
if ( ! ( state - > pid = fork ( ) ) ) {
/* child */
( void ) close ( 0 ) ;
for ( i = 3 ; i < 255 ; + + i ) ( void ) close ( i ) ;
execve ( state - > argv [ 0 ] , state - > argv , env ) ;
_exit ( errno ) ;
} else if ( state - > pid = = - 1 ) {
log_error ( " Can't fork command %s. " , state - > cmd_str ) ;
state - > fails = 1 ;
return 0 ;
}
return 1 ;
}
2016-06-29 16:56:29 +03:00
static int _use_policy ( struct dm_task * dmt , struct dso_state * state )
2015-10-20 15:19:35 +03:00
{
# if THIN_DEBUG
2016-11-01 13:02:01 +03:00
log_debug ( " dmeventd executes: %s. " , state - > cmd_str ) ;
2015-10-20 15:19:35 +03:00
# endif
2017-02-13 00:07:23 +03:00
if ( state - > argv [ 0 ] )
2017-01-20 23:41:23 +03:00
return _run_command ( state ) ;
2015-10-20 15:19:35 +03:00
if ( ! dmeventd_lvm2_run_with_lock ( state - > cmd_str ) ) {
2017-01-20 23:41:23 +03:00
log_error ( " Failed command for %s. " , dm_task_get_name ( dmt ) ) ;
state - > fails = 1 ;
2016-06-29 16:56:29 +03:00
return 0 ;
}
state - > fails = 0 ;
2017-01-20 23:41:23 +03:00
2016-06-29 16:56:29 +03:00
return 1 ;
2015-10-20 15:19:35 +03:00
}
2017-01-20 23:42:55 +03:00
/* Check if executed command has finished
* Only 1 command may run */
static int _wait_for_pid ( struct dso_state * state )
{
int status = 0 ;
if ( state - > pid = = - 1 )
return 1 ;
if ( ! waitpid ( state - > pid , & status , WNOHANG ) )
return 0 ;
/* Wait for finish */
if ( WIFEXITED ( status ) ) {
log_verbose ( " Child %d exited with status %d. " ,
state - > pid , WEXITSTATUS ( status ) ) ;
state - > fails = WEXITSTATUS ( status ) ? 1 : 0 ;
} else {
if ( WIFSIGNALED ( status ) )
log_verbose ( " Child %d was terminated with status %d. " ,
state - > pid , WTERMSIG ( status ) ) ;
state - > fails = 1 ;
}
state - > pid = - 1 ;
return 1 ;
}
2011-12-21 17:08:11 +04:00
void process_event ( struct dm_task * dmt ,
enum dm_event_mask event __attribute__ ( ( unused ) ) ,
2015-10-12 12:40:51 +03:00
void * * user )
2011-12-21 17:08:11 +04:00
{
const char * device = dm_task_get_name ( dmt ) ;
2015-10-12 12:40:51 +03:00
struct dso_state * state = * user ;
2012-01-20 14:59:26 +04:00
struct dm_status_thin_pool * tps = NULL ;
2011-12-21 17:08:11 +04:00
void * next = NULL ;
uint64_t start , length ;
char * target_type = NULL ;
char * params ;
2015-10-20 15:19:35 +03:00
int needs_policy = 0 ;
2016-12-23 01:28:04 +03:00
struct dm_task * new_dmt = NULL ;
2016-06-29 16:56:29 +03:00
# if THIN_DEBUG
log_debug ( " Watch for tp-data:%.2f%% tp-metadata:%.2f%%. " ,
2017-06-24 17:39:50 +03:00
dm_percent_to_round_float ( state - > data_percent_check , 2 ) ,
dm_percent_to_round_float ( state - > metadata_percent_check , 2 ) ) ;
2016-06-29 16:56:29 +03:00
# endif
2017-01-20 23:42:55 +03:00
if ( ! _wait_for_pid ( state ) ) {
log_warn ( " WARNING: Skipping event, child %d is still running (%s). " ,
state - > pid , state - > cmd_str ) ;
return ;
}
2011-12-21 17:08:11 +04:00
2015-10-20 15:19:35 +03:00
if ( event & DM_EVENT_DEVICE_ERROR ) {
/* Error -> no need to check and do instant resize */
2017-01-18 16:06:29 +03:00
state - > data_percent = state - > metadata_percent = 0 ;
2016-06-29 16:56:29 +03:00
if ( _use_policy ( dmt , state ) )
goto out ;
stack ;
2016-12-23 01:28:04 +03:00
/*
* Rather update oldish status
* since after ' command ' processing
* percentage info could have changed a lot .
* If we would get above UMOUNT_THRESH
* we would wait for next sigalarm .
*/
if ( ! ( new_dmt = dm_task_create ( DM_DEVICE_STATUS ) ) )
goto_out ;
if ( ! dm_task_set_uuid ( new_dmt , dm_task_get_uuid ( dmt ) ) )
goto_out ;
/* Non-blocking status read */
if ( ! dm_task_no_flush ( new_dmt ) )
log_warn ( " WARNING: Can't set no_flush for dm status. " ) ;
if ( ! dm_task_run ( new_dmt ) )
goto_out ;
dmt = new_dmt ;
2015-10-20 15:19:35 +03:00
}
2011-12-21 17:08:11 +04:00
dm_get_next_target ( dmt , next , & start , & length , & target_type , & params ) ;
if ( ! target_type | | ( strcmp ( target_type , " thin-pool " ) ! = 0 ) ) {
2015-10-09 22:57:48 +03:00
log_error ( " Invalid target type. " ) ;
2011-12-21 17:08:11 +04:00
goto out ;
}
if ( ! dm_get_status_thin_pool ( state - > mem , params , & tps ) ) {
2015-10-09 22:57:48 +03:00
log_error ( " Failed to parse status. " ) ;
2011-12-21 17:08:11 +04:00
goto out ;
}
# if THIN_DEBUG
2015-10-20 15:19:35 +03:00
log_debug ( " Thin pool status " FMTu64 " / " FMTu64 " "
FMTu64 " / " FMTu64 " . " ,
2015-10-09 22:57:48 +03:00
tps - > used_metadata_blocks , tps - > total_metadata_blocks ,
tps - > used_data_blocks , tps - > total_data_blocks ) ;
2011-12-21 17:08:11 +04:00
# endif
/* Thin pool size had changed. Clear the threshold. */
2012-01-19 19:21:23 +04:00
if ( state - > known_metadata_size ! = tps - > total_metadata_blocks ) {
state - > metadata_percent_check = CHECK_MINIMUM ;
state - > known_metadata_size = tps - > total_metadata_blocks ;
2017-01-21 00:53:55 +03:00
state - > fails = 0 ;
2011-12-21 17:08:11 +04:00
}
if ( state - > known_data_size ! = tps - > total_data_blocks ) {
state - > data_percent_check = CHECK_MINIMUM ;
state - > known_data_size = tps - > total_data_blocks ;
2017-01-21 00:53:55 +03:00
state - > fails = 0 ;
2011-12-21 17:08:11 +04:00
}
2017-01-21 01:07:05 +03:00
/*
* Trigger action when threshold boundary is exceeded .
* Report 80 % threshold warning when it ' s used above 80 % .
* Only 100 % is exception as it cannot be surpased so policy
* action is called for : > 50 % , > 55 % . . . > 95 % , 100 %
*/
2017-01-18 16:06:29 +03:00
state - > metadata_percent = dm_make_percent ( tps - > used_metadata_blocks , tps - > total_metadata_blocks ) ;
2017-04-19 20:57:00 +03:00
if ( ( state - > metadata_percent > WARNING_THRESH ) & &
( state - > metadata_percent > state - > metadata_percent_check ) )
2017-01-20 23:53:13 +03:00
log_warn ( " WARNING: Thin pool %s metadata is now %.2f%% full. " ,
2017-06-24 17:39:50 +03:00
device , dm_percent_to_round_float ( state - > metadata_percent , 2 ) ) ;
2017-01-21 01:07:05 +03:00
if ( state - > metadata_percent > CHECK_MINIMUM ) {
/* Run action when usage raised more than CHECK_STEP since the last time */
if ( state - > metadata_percent > state - > metadata_percent_check )
needs_policy = 1 ;
state - > metadata_percent_check = ( state - > metadata_percent / CHECK_STEP + 1 ) * CHECK_STEP ;
if ( state - > metadata_percent_check = = DM_PERCENT_100 )
state - > metadata_percent_check - - ; /* Can't get bigger then 100% */
} else
state - > metadata_percent_check = CHECK_MINIMUM ;
2011-12-21 17:08:11 +04:00
2017-01-18 16:06:29 +03:00
state - > data_percent = dm_make_percent ( tps - > used_data_blocks , tps - > total_data_blocks ) ;
2017-04-19 20:57:00 +03:00
if ( ( state - > data_percent > WARNING_THRESH ) & &
( state - > data_percent > state - > data_percent_check ) )
2017-01-20 23:53:13 +03:00
log_warn ( " WARNING: Thin pool %s data is now %.2f%% full. " ,
2017-06-24 17:39:50 +03:00
device , dm_percent_to_round_float ( state - > data_percent , 2 ) ) ;
2017-01-21 01:07:05 +03:00
if ( state - > data_percent > CHECK_MINIMUM ) {
/* Run action when usage raised more than CHECK_STEP since the last time */
if ( state - > data_percent > state - > data_percent_check )
needs_policy = 1 ;
state - > data_percent_check = ( state - > data_percent / CHECK_STEP + 1 ) * CHECK_STEP ;
if ( state - > data_percent_check = = DM_PERCENT_100 )
state - > data_percent_check - - ; /* Can't get bigger then 100% */
} else
state - > data_percent_check = CHECK_MINIMUM ;
2015-10-20 15:19:35 +03:00
2017-01-21 00:53:55 +03:00
/* Reduce number of _use_policy() calls by power-of-2 factor till frequency of MAX_FAILS is reached.
* Avoids too high number of error retries , yet shows some status messages in log regularly .
* i . e . PV could have been pvmoved and VG / LV was locked for a while . . .
*/
if ( state - > fails ) {
if ( state - > fails + + < = state - > max_fails ) {
log_debug ( " Postponing frequently failing policy (%u <= %u). " ,
state - > fails - 1 , state - > max_fails ) ;
return ;
}
if ( state - > max_fails < MAX_FAILS )
state - > max_fails < < = 1 ;
state - > fails = needs_policy = 1 ; /* Retry failing command */
} else
state - > max_fails = 1 ; /* Reset on success */
2017-01-21 01:06:45 +03:00
if ( needs_policy )
_use_policy ( dmt , state ) ;
2011-12-21 17:08:11 +04:00
out :
if ( tps )
dm_pool_free ( state - > mem , tps ) ;
2015-10-29 13:52:11 +03:00
2016-12-23 01:28:04 +03:00
if ( new_dmt )
dm_task_destroy ( new_dmt ) ;
2011-12-21 17:08:11 +04:00
}
2017-01-18 11:55:46 +03:00
/* Handle SIGCHLD for a thread */
static void _sig_child ( int signum __attribute__ ( ( unused ) ) )
{
/* empty SIG_IGN */ ;
}
/* Setup handler for SIGCHLD when executing external command
* to get quick ' waitpid ( ) ' reaction
* It will interrupt syscall just like SIGALRM and
* invoke process_event ( ) .
*/
static void _init_thread_signals ( struct dso_state * state )
{
struct sigaction act = { . sa_handler = _sig_child } ;
sigset_t my_sigset ;
sigemptyset ( & my_sigset ) ;
if ( sigaction ( SIGCHLD , & act , NULL ) )
log_warn ( " WARNING: Failed to set SIGCHLD action. " ) ;
else if ( sigaddset ( & my_sigset , SIGCHLD ) )
log_warn ( " WARNING: Failed to add SIGCHLD to set. " ) ;
else if ( pthread_sigmask ( SIG_UNBLOCK , & my_sigset , & state - > old_sigset ) )
log_warn ( " WARNING: Failed to unblock SIGCHLD. " ) ;
else
state - > restore_sigset = 1 ;
}
static void _restore_thread_signals ( struct dso_state * state )
{
if ( state - > restore_sigset & &
pthread_sigmask ( SIG_SETMASK , & state - > old_sigset , NULL ) )
log_warn ( " WARNING: Failed to block SIGCHLD. " ) ;
}
2011-12-21 17:08:11 +04:00
int register_device ( const char * device ,
const char * uuid __attribute__ ( ( unused ) ) ,
int major __attribute__ ( ( unused ) ) ,
int minor __attribute__ ( ( unused ) ) ,
2015-10-12 12:40:51 +03:00
void * * user )
2011-12-21 17:08:11 +04:00
{
struct dso_state * state ;
2017-01-20 23:50:23 +03:00
char * str ;
2017-02-13 00:07:23 +03:00
char cmd_str [ PATH_MAX + 128 + 2 ] ; /* cmd ' ' vg/lv \0 */
2011-12-21 17:08:11 +04:00
2015-10-13 12:30:37 +03:00
if ( ! dmeventd_lvm2_init_with_pool ( " thin_pool_state " , state ) )
goto_bad ;
2011-12-22 19:57:29 +04:00
2017-02-13 00:07:23 +03:00
if ( ! dmeventd_lvm2_command ( state - > mem , cmd_str , sizeof ( cmd_str ) ,
2017-02-13 21:00:59 +03:00
" _dmeventd_thin_command " , device ) )
2015-10-13 12:30:37 +03:00
goto_bad ;
2011-12-21 17:08:11 +04:00
2017-02-13 00:07:23 +03:00
if ( strncmp ( cmd_str , " lvm " , 4 ) = = 0 ) {
if ( ! ( state - > cmd_str = dm_pool_strdup ( state - > mem , cmd_str + 4 ) ) ) {
log_error ( " Failed to copy lvm command. " ) ;
goto bad ;
}
} else if ( cmd_str [ 0 ] = = ' / ' ) {
if ( ! ( state - > cmd_str = dm_pool_strdup ( state - > mem , cmd_str ) ) ) {
log_error ( " Failed to copy thin command. " ) ;
2017-01-20 23:50:23 +03:00
goto bad ;
}
2017-02-13 00:07:23 +03:00
/* Find last space before 'vg/lv' */
if ( ! ( str = strrchr ( state - > cmd_str , ' ' ) ) )
goto inval ;
if ( ! ( state - > argv [ 0 ] = dm_pool_strndup ( state - > mem , state - > cmd_str ,
str - state - > cmd_str ) ) ) {
log_error ( " Failed to copy command. " ) ;
goto bad ;
}
state - > argv [ 1 ] = str + 1 ; /* 1 argument - vg/lv */
2017-01-18 11:55:46 +03:00
_init_thread_signals ( state ) ;
2017-02-13 00:07:23 +03:00
} else /* Unuspported command format */
goto inval ;
2017-01-20 23:50:23 +03:00
2017-01-20 23:42:55 +03:00
state - > pid = - 1 ;
2015-10-12 12:40:51 +03:00
* user = state ;
2011-12-21 17:08:11 +04:00
2016-08-31 12:05:39 +03:00
log_info ( " Monitoring thin pool %s. " , device ) ;
2011-12-21 17:08:11 +04:00
return 1 ;
2017-02-13 00:07:23 +03:00
inval :
log_error ( " Invalid command for monitoring: %s. " , cmd_str ) ;
2011-12-22 19:57:29 +04:00
bad :
2016-08-31 12:05:39 +03:00
log_error ( " Failed to monitor thin pool %s. " , device ) ;
2011-12-22 19:57:29 +04:00
2017-02-13 21:00:59 +03:00
if ( state )
dmeventd_lvm2_exit_with_pool ( state ) ;
2011-12-22 19:57:29 +04:00
return 0 ;
2011-12-21 17:08:11 +04:00
}
int unregister_device ( const char * device ,
const char * uuid __attribute__ ( ( unused ) ) ,
int major __attribute__ ( ( unused ) ) ,
int minor __attribute__ ( ( unused ) ) ,
2015-10-12 12:40:51 +03:00
void * * user )
2011-12-21 17:08:11 +04:00
{
2015-10-12 12:40:51 +03:00
struct dso_state * state = * user ;
2017-01-20 23:42:55 +03:00
int i ;
for ( i = 0 ; ! _wait_for_pid ( state ) & & ( i < 6 ) ; + + i ) {
if ( i = = 0 )
/* Give it 2 seconds, then try to terminate & kill it */
log_verbose ( " Child %d still not finished (%s) waiting. " ,
state - > pid , state - > cmd_str ) ;
else if ( i = = 3 ) {
log_warn ( " WARNING: Terminating child %d. " , state - > pid ) ;
kill ( state - > pid , SIGINT ) ;
kill ( state - > pid , SIGTERM ) ;
} else if ( i = = 5 ) {
log_warn ( " WARNING: Killing child %d. " , state - > pid ) ;
kill ( state - > pid , SIGKILL ) ;
}
sleep ( 1 ) ;
}
if ( state - > pid ! = - 1 )
log_warn ( " WARNING: Cannot kill child %d! " , state - > pid ) ;
2011-12-21 17:08:11 +04:00
2017-01-18 11:55:46 +03:00
_restore_thread_signals ( state ) ;
2015-10-13 12:30:37 +03:00
dmeventd_lvm2_exit_with_pool ( state ) ;
2016-08-31 12:05:39 +03:00
log_info ( " No longer monitoring thin pool %s. " , device ) ;
2011-12-21 17:08:11 +04:00
return 1 ;
}