2011-12-21 13:08:11 +00:00
/*
2017-01-20 21:41:23 +01:00
* Copyright ( C ) 2011 - 2017 Red Hat , Inc . All rights reserved .
2011-12-21 13:08:11 +00:00
*
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v .2 .1 .
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program ; if not , write to the Free Software Foundation ,
2016-01-21 11:49:46 +01:00
* Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2011-12-21 13:08:11 +00:00
*/
2015-10-10 16:58:31 +02:00
# include "lib.h" /* using here lvm log */
2011-12-21 13:08:11 +00:00
# include "dmeventd_lvm.h"
2015-10-10 16:58:31 +02:00
# include "libdevmapper-event.h"
2011-12-21 13:08:11 +00:00
# include <sys/wait.h>
2014-04-15 13:27:47 +02:00
# include <stdarg.h>
2015-10-29 11:52:11 +01:00
# include <pthread.h>
2011-12-21 13:08:11 +00:00
2015-10-12 11:35:33 +02:00
/* TODO - move this mountinfo code into library to be reusable */
# ifdef __linux__
# include "kdev_t.h"
# else
# define MAJOR(x) major((x))
# define MINOR(x) minor((x))
# endif
/* First warning when thin data or metadata is 80% full. */
2015-10-20 14:19:35 +02:00
# define WARNING_THRESH (DM_PERCENT_1 * 80)
2016-06-29 15:56:29 +02:00
/* Umount thin LVs when thin data or metadata LV is >=
* and lvextend - - use - policies has failed . */
# define UMOUNT_THRESH (DM_PERCENT_1 * 95)
2011-12-21 13:08:11 +00:00
/* Run a check every 5%. */
2015-10-20 14:19:35 +02:00
# define CHECK_STEP (DM_PERCENT_1 * 5)
2015-10-12 11:35:33 +02:00
/* Do not bother checking thin data or metadata is less than 50% full. */
2015-10-20 14:19:35 +02:00
# define CHECK_MINIMUM (DM_PERCENT_1 * 50)
2011-12-21 13:08:11 +00:00
# define UMOUNT_COMMAND " / bin / umount"
2017-01-20 22:53:55 +01:00
# define MAX_FAILS (256) /* ~42 mins between cmd call retry with 10s delay */
2015-10-29 11:52:11 +01:00
2011-12-21 13:08:11 +00:00
# define THIN_DEBUG 0
struct dso_state {
struct dm_pool * mem ;
2012-01-19 15:21:23 +00:00
int metadata_percent_check ;
2017-01-18 14:06:29 +01:00
int metadata_percent ;
2017-01-20 21:53:13 +01:00
int metadata_warn_once ;
2011-12-21 13:08:11 +00:00
int data_percent_check ;
2017-01-18 14:06:29 +01:00
int data_percent ;
2017-01-20 21:53:13 +01:00
int data_warn_once ;
2012-01-19 15:21:23 +00:00
uint64_t known_metadata_size ;
2011-12-21 13:08:11 +00:00
uint64_t known_data_size ;
2015-10-29 11:52:11 +01:00
unsigned fails ;
2017-01-20 22:53:55 +01:00
unsigned max_fails ;
2017-01-18 09:55:46 +01:00
int restore_sigset ;
sigset_t old_sigset ;
2017-01-20 21:41:23 +01:00
pid_t pid ;
char * * argv ;
2011-12-21 13:08:11 +00:00
char cmd_str [ 1024 ] ;
} ;
2015-10-10 16:58:31 +02:00
DM_EVENT_LOG_FN ( " thin " )
2016-06-29 15:56:29 +02:00
# define UUID_PREFIX "LVM-"
2017-01-20 21:41:23 +01:00
static int _run_command ( struct dso_state * state )
{
char val [ 2 ] [ 36 ] ;
char * env [ ] = { val [ 0 ] , val [ 1 ] , NULL } ;
int i ;
if ( state - > data_percent ) {
/* Prepare some known data to env vars for easy use */
( void ) dm_snprintf ( val [ 0 ] , sizeof ( val [ 0 ] ) , " DMEVENTD_THIN_POOL_DATA=%d " ,
state - > data_percent / DM_PERCENT_1 ) ;
( void ) dm_snprintf ( val [ 1 ] , sizeof ( val [ 1 ] ) , " DMEVENTD_THIN_POOL_METADATA=%d " ,
state - > metadata_percent / DM_PERCENT_1 ) ;
} else {
/* For an error event it's for a user to check status and decide */
env [ 0 ] = NULL ;
log_debug ( " Error event processing " ) ;
}
log_verbose ( " Executing command: %s " , state - > cmd_str ) ;
/* TODO:
* Support parallel run of ' task ' and it ' s waitpid maintainence
* ATM we can ' t handle signaling of SIGALRM
* as signalling is not allowed while ' process_event ( ) ' is running
*/
if ( ! ( state - > pid = fork ( ) ) ) {
/* child */
( void ) close ( 0 ) ;
for ( i = 3 ; i < 255 ; + + i ) ( void ) close ( i ) ;
execve ( state - > argv [ 0 ] , state - > argv , env ) ;
_exit ( errno ) ;
} else if ( state - > pid = = - 1 ) {
log_error ( " Can't fork command %s. " , state - > cmd_str ) ;
state - > fails = 1 ;
return 0 ;
}
return 1 ;
}
2016-06-29 15:56:29 +02:00
static int _use_policy ( struct dm_task * dmt , struct dso_state * state )
2015-10-20 14:19:35 +02:00
{
# if THIN_DEBUG
2016-11-01 11:02:01 +01:00
log_debug ( " dmeventd executes: %s. " , state - > cmd_str ) ;
2015-10-20 14:19:35 +02:00
# endif
2017-01-20 21:41:23 +01:00
if ( state - > argv )
return _run_command ( state ) ;
2015-10-20 14:19:35 +02:00
if ( ! dmeventd_lvm2_run_with_lock ( state - > cmd_str ) ) {
2017-01-20 21:41:23 +01:00
log_error ( " Failed command for %s. " , dm_task_get_name ( dmt ) ) ;
state - > fails = 1 ;
2016-06-29 15:56:29 +02:00
return 0 ;
}
state - > fails = 0 ;
2017-01-20 21:41:23 +01:00
2016-06-29 15:56:29 +02:00
return 1 ;
2015-10-20 14:19:35 +02:00
}
2017-01-20 21:42:55 +01:00
/* Check if executed command has finished
* Only 1 command may run */
static int _wait_for_pid ( struct dso_state * state )
{
int status = 0 ;
if ( state - > pid = = - 1 )
return 1 ;
if ( ! waitpid ( state - > pid , & status , WNOHANG ) )
return 0 ;
/* Wait for finish */
if ( WIFEXITED ( status ) ) {
log_verbose ( " Child %d exited with status %d. " ,
state - > pid , WEXITSTATUS ( status ) ) ;
state - > fails = WEXITSTATUS ( status ) ? 1 : 0 ;
} else {
if ( WIFSIGNALED ( status ) )
log_verbose ( " Child %d was terminated with status %d. " ,
state - > pid , WTERMSIG ( status ) ) ;
state - > fails = 1 ;
}
state - > pid = - 1 ;
return 1 ;
}
2011-12-21 13:08:11 +00:00
void process_event ( struct dm_task * dmt ,
enum dm_event_mask event __attribute__ ( ( unused ) ) ,
2015-10-12 11:40:51 +02:00
void * * user )
2011-12-21 13:08:11 +00:00
{
const char * device = dm_task_get_name ( dmt ) ;
2015-10-12 11:40:51 +02:00
struct dso_state * state = * user ;
2012-01-20 10:59:26 +00:00
struct dm_status_thin_pool * tps = NULL ;
2011-12-21 13:08:11 +00:00
void * next = NULL ;
uint64_t start , length ;
char * target_type = NULL ;
char * params ;
2015-10-20 14:19:35 +02:00
int needs_policy = 0 ;
2016-12-22 23:28:04 +01:00
struct dm_task * new_dmt = NULL ;
2016-06-29 15:56:29 +02:00
# if THIN_DEBUG
log_debug ( " Watch for tp-data:%.2f%% tp-metadata:%.2f%%. " ,
dm_percent_to_float ( state - > data_percent_check ) ,
dm_percent_to_float ( state - > metadata_percent_check ) ) ;
# endif
2017-01-20 21:42:55 +01:00
if ( ! _wait_for_pid ( state ) ) {
log_warn ( " WARNING: Skipping event, child %d is still running (%s). " ,
state - > pid , state - > cmd_str ) ;
return ;
}
2011-12-21 13:08:11 +00:00
2015-10-20 14:19:35 +02:00
if ( event & DM_EVENT_DEVICE_ERROR ) {
/* Error -> no need to check and do instant resize */
2017-01-18 14:06:29 +01:00
state - > data_percent = state - > metadata_percent = 0 ;
2016-06-29 15:56:29 +02:00
if ( _use_policy ( dmt , state ) )
goto out ;
stack ;
2016-12-22 23:28:04 +01:00
/*
* Rather update oldish status
* since after ' command ' processing
* percentage info could have changed a lot .
* If we would get above UMOUNT_THRESH
* we would wait for next sigalarm .
*/
if ( ! ( new_dmt = dm_task_create ( DM_DEVICE_STATUS ) ) )
goto_out ;
if ( ! dm_task_set_uuid ( new_dmt , dm_task_get_uuid ( dmt ) ) )
goto_out ;
/* Non-blocking status read */
if ( ! dm_task_no_flush ( new_dmt ) )
log_warn ( " WARNING: Can't set no_flush for dm status. " ) ;
if ( ! dm_task_run ( new_dmt ) )
goto_out ;
dmt = new_dmt ;
2015-10-20 14:19:35 +02:00
}
2011-12-21 13:08:11 +00:00
dm_get_next_target ( dmt , next , & start , & length , & target_type , & params ) ;
if ( ! target_type | | ( strcmp ( target_type , " thin-pool " ) ! = 0 ) ) {
2015-10-09 21:57:48 +02:00
log_error ( " Invalid target type. " ) ;
2011-12-21 13:08:11 +00:00
goto out ;
}
if ( ! dm_get_status_thin_pool ( state - > mem , params , & tps ) ) {
2015-10-09 21:57:48 +02:00
log_error ( " Failed to parse status. " ) ;
2011-12-21 13:08:11 +00:00
goto out ;
}
# if THIN_DEBUG
2015-10-20 14:19:35 +02:00
log_debug ( " Thin pool status " FMTu64 " / " FMTu64 " "
FMTu64 " / " FMTu64 " . " ,
2015-10-09 21:57:48 +02:00
tps - > used_metadata_blocks , tps - > total_metadata_blocks ,
tps - > used_data_blocks , tps - > total_data_blocks ) ;
2011-12-21 13:08:11 +00:00
# endif
/* Thin pool size had changed. Clear the threshold. */
2012-01-19 15:21:23 +00:00
if ( state - > known_metadata_size ! = tps - > total_metadata_blocks ) {
state - > metadata_percent_check = CHECK_MINIMUM ;
state - > known_metadata_size = tps - > total_metadata_blocks ;
2017-01-20 22:53:55 +01:00
state - > fails = 0 ;
2011-12-21 13:08:11 +00:00
}
if ( state - > known_data_size ! = tps - > total_data_blocks ) {
state - > data_percent_check = CHECK_MINIMUM ;
state - > known_data_size = tps - > total_data_blocks ;
2017-01-20 22:53:55 +01:00
state - > fails = 0 ;
2011-12-21 13:08:11 +00:00
}
2017-01-20 23:07:05 +01:00
/*
* Trigger action when threshold boundary is exceeded .
* Report 80 % threshold warning when it ' s used above 80 % .
* Only 100 % is exception as it cannot be surpased so policy
* action is called for : > 50 % , > 55 % . . . > 95 % , 100 %
*/
2017-01-18 14:06:29 +01:00
state - > metadata_percent = dm_make_percent ( tps - > used_metadata_blocks , tps - > total_metadata_blocks ) ;
2017-01-20 21:53:13 +01:00
if ( state - > metadata_percent < = WARNING_THRESH )
state - > metadata_warn_once = 0 ; /* Dropped bellow threshold, reset warn once */
else if ( ! state - > metadata_warn_once + + ) /* Warn once when raised above threshold */
log_warn ( " WARNING: Thin pool %s metadata is now %.2f%% full. " ,
device , dm_percent_to_float ( state - > metadata_percent ) ) ;
2017-01-20 23:07:05 +01:00
if ( state - > metadata_percent > CHECK_MINIMUM ) {
/* Run action when usage raised more than CHECK_STEP since the last time */
if ( state - > metadata_percent > state - > metadata_percent_check )
needs_policy = 1 ;
state - > metadata_percent_check = ( state - > metadata_percent / CHECK_STEP + 1 ) * CHECK_STEP ;
if ( state - > metadata_percent_check = = DM_PERCENT_100 )
state - > metadata_percent_check - - ; /* Can't get bigger then 100% */
} else
state - > metadata_percent_check = CHECK_MINIMUM ;
2011-12-21 13:08:11 +00:00
2017-01-18 14:06:29 +01:00
state - > data_percent = dm_make_percent ( tps - > used_data_blocks , tps - > total_data_blocks ) ;
2017-01-20 21:53:13 +01:00
if ( state - > data_percent < = WARNING_THRESH )
state - > data_warn_once = 0 ;
else if ( ! state - > data_warn_once + + )
log_warn ( " WARNING: Thin pool %s data is now %.2f%% full. " ,
device , dm_percent_to_float ( state - > data_percent ) ) ;
2017-01-20 23:07:05 +01:00
if ( state - > data_percent > CHECK_MINIMUM ) {
/* Run action when usage raised more than CHECK_STEP since the last time */
if ( state - > data_percent > state - > data_percent_check )
needs_policy = 1 ;
state - > data_percent_check = ( state - > data_percent / CHECK_STEP + 1 ) * CHECK_STEP ;
if ( state - > data_percent_check = = DM_PERCENT_100 )
state - > data_percent_check - - ; /* Can't get bigger then 100% */
} else
state - > data_percent_check = CHECK_MINIMUM ;
2015-10-20 14:19:35 +02:00
2017-01-20 22:53:55 +01:00
/* Reduce number of _use_policy() calls by power-of-2 factor till frequency of MAX_FAILS is reached.
* Avoids too high number of error retries , yet shows some status messages in log regularly .
* i . e . PV could have been pvmoved and VG / LV was locked for a while . . .
*/
if ( state - > fails ) {
if ( state - > fails + + < = state - > max_fails ) {
log_debug ( " Postponing frequently failing policy (%u <= %u). " ,
state - > fails - 1 , state - > max_fails ) ;
return ;
}
if ( state - > max_fails < MAX_FAILS )
state - > max_fails < < = 1 ;
state - > fails = needs_policy = 1 ; /* Retry failing command */
} else
state - > max_fails = 1 ; /* Reset on success */
2017-01-20 23:06:45 +01:00
if ( needs_policy )
_use_policy ( dmt , state ) ;
2011-12-21 13:08:11 +00:00
out :
if ( tps )
dm_pool_free ( state - > mem , tps ) ;
2015-10-29 11:52:11 +01:00
2016-12-22 23:28:04 +01:00
if ( new_dmt )
dm_task_destroy ( new_dmt ) ;
2011-12-21 13:08:11 +00:00
}
2017-01-18 09:55:46 +01:00
/* Handle SIGCHLD for a thread */
static void _sig_child ( int signum __attribute__ ( ( unused ) ) )
{
/* empty SIG_IGN */ ;
}
/* Setup handler for SIGCHLD when executing external command
* to get quick ' waitpid ( ) ' reaction
* It will interrupt syscall just like SIGALRM and
* invoke process_event ( ) .
*/
static void _init_thread_signals ( struct dso_state * state )
{
struct sigaction act = { . sa_handler = _sig_child } ;
sigset_t my_sigset ;
sigemptyset ( & my_sigset ) ;
if ( sigaction ( SIGCHLD , & act , NULL ) )
log_warn ( " WARNING: Failed to set SIGCHLD action. " ) ;
else if ( sigaddset ( & my_sigset , SIGCHLD ) )
log_warn ( " WARNING: Failed to add SIGCHLD to set. " ) ;
else if ( pthread_sigmask ( SIG_UNBLOCK , & my_sigset , & state - > old_sigset ) )
log_warn ( " WARNING: Failed to unblock SIGCHLD. " ) ;
else
state - > restore_sigset = 1 ;
}
static void _restore_thread_signals ( struct dso_state * state )
{
if ( state - > restore_sigset & &
pthread_sigmask ( SIG_SETMASK , & state - > old_sigset , NULL ) )
log_warn ( " WARNING: Failed to block SIGCHLD. " ) ;
}
2011-12-21 13:08:11 +00:00
int register_device ( const char * device ,
const char * uuid __attribute__ ( ( unused ) ) ,
int major __attribute__ ( ( unused ) ) ,
int minor __attribute__ ( ( unused ) ) ,
2015-10-12 11:40:51 +02:00
void * * user )
2011-12-21 13:08:11 +00:00
{
struct dso_state * state ;
2017-01-20 21:50:23 +01:00
int maxcmd ;
char * str ;
2011-12-21 13:08:11 +00:00
2015-10-13 11:30:37 +02:00
if ( ! dmeventd_lvm2_init_with_pool ( " thin_pool_state " , state ) )
goto_bad ;
2011-12-22 15:57:29 +00:00
2015-10-13 11:30:37 +02:00
if ( ! dmeventd_lvm2_command ( state - > mem , state - > cmd_str ,
2011-12-22 15:57:29 +00:00
sizeof ( state - > cmd_str ) ,
" lvextend --use-policies " ,
device ) ) {
2015-10-13 11:30:37 +02:00
dmeventd_lvm2_exit_with_pool ( state ) ;
goto_bad ;
2011-12-21 13:08:11 +00:00
}
2017-01-20 21:50:23 +01:00
if ( strncmp ( state - > cmd_str , " lvm " , 4 ) ) {
maxcmd = 2 ; /* space for last NULL element */
for ( str = state - > cmd_str ; * str ; str + + )
if ( * str = = ' ' )
maxcmd + + ;
if ( ! ( str = dm_pool_strdup ( state - > mem , state - > cmd_str ) ) | |
! ( state - > argv = dm_pool_zalloc ( state - > mem , maxcmd * sizeof ( char * ) ) ) ) {
log_error ( " Failed to allocate memory for command. " ) ;
goto bad ;
}
dm_split_words ( str , maxcmd - 1 , 0 , state - > argv ) ;
2017-01-18 09:55:46 +01:00
_init_thread_signals ( state ) ;
2017-01-20 21:50:23 +01:00
}
2017-01-20 21:42:55 +01:00
state - > pid = - 1 ;
2015-10-12 11:40:51 +02:00
* user = state ;
2011-12-21 13:08:11 +00:00
2016-08-31 11:05:39 +02:00
log_info ( " Monitoring thin pool %s. " , device ) ;
2011-12-21 13:08:11 +00:00
return 1 ;
2011-12-22 15:57:29 +00:00
bad :
2016-08-31 11:05:39 +02:00
log_error ( " Failed to monitor thin pool %s. " , device ) ;
2011-12-22 15:57:29 +00:00
return 0 ;
2011-12-21 13:08:11 +00:00
}
int unregister_device ( const char * device ,
const char * uuid __attribute__ ( ( unused ) ) ,
int major __attribute__ ( ( unused ) ) ,
int minor __attribute__ ( ( unused ) ) ,
2015-10-12 11:40:51 +02:00
void * * user )
2011-12-21 13:08:11 +00:00
{
2015-10-12 11:40:51 +02:00
struct dso_state * state = * user ;
2017-01-20 21:42:55 +01:00
int i ;
for ( i = 0 ; ! _wait_for_pid ( state ) & & ( i < 6 ) ; + + i ) {
if ( i = = 0 )
/* Give it 2 seconds, then try to terminate & kill it */
log_verbose ( " Child %d still not finished (%s) waiting. " ,
state - > pid , state - > cmd_str ) ;
else if ( i = = 3 ) {
log_warn ( " WARNING: Terminating child %d. " , state - > pid ) ;
kill ( state - > pid , SIGINT ) ;
kill ( state - > pid , SIGTERM ) ;
} else if ( i = = 5 ) {
log_warn ( " WARNING: Killing child %d. " , state - > pid ) ;
kill ( state - > pid , SIGKILL ) ;
}
sleep ( 1 ) ;
}
if ( state - > pid ! = - 1 )
log_warn ( " WARNING: Cannot kill child %d! " , state - > pid ) ;
2011-12-21 13:08:11 +00:00
2017-01-18 09:55:46 +01:00
_restore_thread_signals ( state ) ;
2015-10-13 11:30:37 +02:00
dmeventd_lvm2_exit_with_pool ( state ) ;
2016-08-31 11:05:39 +02:00
log_info ( " No longer monitoring thin pool %s. " , device ) ;
2011-12-21 13:08:11 +00:00
return 1 ;
}