5
0
mirror of git://git.proxmox.com/git/pve-ha-manager.git synced 2025-01-06 17:18:00 +03:00

LRM: factor out closing watchdog local helper

It's not much but repeated a few times, and as a next commit will add
another such time let's just refactor it to a local private helper
with a very explicit name and comment about what implications calling
it has.

Take the chance and add some more safety comments too.

Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
This commit is contained in:
Thomas Lamprecht 2021-07-01 15:56:37 +02:00
parent c259b1a814
commit abc1499bc6

View File

@ -262,6 +262,17 @@ sub do_one_iteration {
return $res; return $res;
} }
# NOTE: this is disabling the self-fence mechanism, so it must NOT be called with active services
# It's normally *only* OK on graceful shutdown (with no services, or all services frozen)
my sub give_up_watchdog_protection {
my ($self) = @_;
if ($self->{ha_agent_wd}) {
$self->{haenv}->watchdog_close($self->{ha_agent_wd});
delete $self->{ha_agent_wd}; # only delete after close!
}
}
sub work { sub work {
my ($self) = @_; my ($self) = @_;
@ -362,13 +373,9 @@ sub work {
my $service_count = $self->active_service_count(); my $service_count = $self->active_service_count();
if ($service_count == 0) { if ($service_count == 0) {
if ($self->run_workers() == 0) { if ($self->run_workers() == 0) {
if ($self->{ha_agent_wd}) { # safety: no active services or workers -> OK
$haenv->watchdog_close($self->{ha_agent_wd}); give_up_watchdog_protection($self);
delete $self->{ha_agent_wd};
}
$shutdown = 1; $shutdown = 1;
# restart with no or freezed services, release the lock # restart with no or freezed services, release the lock
@ -379,10 +386,8 @@ sub work {
if ($self->run_workers() == 0) { if ($self->run_workers() == 0) {
if ($self->{shutdown_errors} == 0) { if ($self->{shutdown_errors} == 0) {
if ($self->{ha_agent_wd}) { # safety: no active services and LRM shutdown -> OK
$haenv->watchdog_close($self->{ha_agent_wd}); give_up_watchdog_protection($self);
delete $self->{ha_agent_wd};
}
# shutdown with all services stopped thus release the lock # shutdown with all services stopped thus release the lock
$haenv->release_ha_agent_lock(); $haenv->release_ha_agent_lock();
@ -416,10 +421,8 @@ sub work {
} elsif ($state eq 'lost_agent_lock') { } elsif ($state eq 'lost_agent_lock') {
# Note: watchdog is active an will triger soon! # NOTE: watchdog is active an will trigger soon!
# so we hope to get the lock back soon! # so we hope to get the lock back soon!
if ($self->{shutdown_request}) { if ($self->{shutdown_request}) {
my $service_count = $self->active_service_count(); my $service_count = $self->active_service_count();
@ -441,13 +444,8 @@ sub work {
} }
} }
} else { } else {
# safety: all services are stopped, so we can close the watchdog
# all services are stopped, so we can close the watchdog give_up_watchdog_protection($self);
if ($self->{ha_agent_wd}) {
$haenv->watchdog_close($self->{ha_agent_wd});
delete $self->{ha_agent_wd};
}
return 0; return 0;
} }
@ -467,10 +465,8 @@ sub work {
if ($self->{shutdown_request}) { if ($self->{shutdown_request}) {
if ($service_count == 0 && $self->run_workers() == 0) { if ($service_count == 0 && $self->run_workers() == 0) {
if ($self->{ha_agent_wd}) { # safety: going into maintenance and all active services got moved -> OK
$haenv->watchdog_close($self->{ha_agent_wd}); give_up_watchdog_protection($self);
delete $self->{ha_agent_wd};
}
$exit_lrm = 1; $exit_lrm = 1;