From 618fbeda143db65d5e3e7fbe48ed8ab4b9ef4bd0 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Sat, 14 Feb 2015 11:52:35 +0100 Subject: [PATCH] improve CRM state transitions --- PVE/HA/Manager.pm | 29 ++++++++++++++++++++++++++++- PVE/HA/Sim/RTEnv.pm | 16 +++------------- README | 16 ++++++++++++++++ 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/PVE/HA/Manager.pm b/PVE/HA/Manager.pm index c27bff1..bc7adf7 100644 --- a/PVE/HA/Manager.pm +++ b/PVE/HA/Manager.pm @@ -66,6 +66,15 @@ sub select_service_node { my $uid_counter = 0; +my $valid_service_states = { + stopped => 1, + request_stop => 1, + started => 1, + fence => 1, + migrate => 1, + error => 1, +}; + my $change_service_state = sub { my ($self, $sid, $new_state, %params) = @_; @@ -77,6 +86,8 @@ my $change_service_state = sub { die "no state change" if $old_state eq $new_state; # just to be sure + die "invalid CRM service state '$new_state'\n" if !$valid_service_states->{$new_state}; + my $changes = ''; foreach my $k (keys %params) { my $v = $params{$k}; @@ -199,13 +210,29 @@ sub manage { } elsif ($last_state eq 'request_stop') { -#fixme: die "implement me"; + # do nothing here } else { die "unknown service state '$last_state'"; } + # check results from LRM daemons + my $lrm_res = $sd->{uid} ? $lrm_status->{$sd->{uid}} : undef; + if ($lrm_res) { + my $exit_code = $lrm_res->{exit_code}; + + if ($sd->{state} eq 'request_stop') { + if ($exit_code == 0) { + &$change_service_state($self, $sid, 'stopped'); + } else { + &$change_service_state($self, $sid, 'error'); # fixme: what state? + } + } elsif ($sd->{state} eq 'started') { + + } + + } $repeat = 1 if $sd->{state} ne $last_state; } diff --git a/PVE/HA/Sim/RTEnv.pm b/PVE/HA/Sim/RTEnv.pm index 6fa8e10..580223b 100644 --- a/PVE/HA/Sim/RTEnv.pm +++ b/PVE/HA/Sim/RTEnv.pm @@ -78,17 +78,7 @@ sub exec_resource_agent { my $ss = $hardware->read_service_status(); - if ($cmd eq 'request_stop') { - - if (!$ss->{$sid}) { - print "WORKER status $sid: stopped\n"; - return 0; - } else { - print "WORKER status $sid: running\n"; - return 1; - } - - } elsif ($cmd eq 'start') { + if ($cmd eq 'started') { if ($ss->{$sid}) { print "WORKER status $sid: running\n"; @@ -105,7 +95,7 @@ sub exec_resource_agent { return 0; - } elsif ($cmd eq 'stop') { + } elsif ($cmd eq 'request_stop' || $cmd eq 'stopped') { if (!$ss->{$sid}) { print "WORKER status $sid: stopped\n"; @@ -123,7 +113,7 @@ sub exec_resource_agent { return 0; } - die "implement me"; + die "implement me (cmd '$cmd')"; } 1; diff --git a/README b/README index e8f88af..88ca2e7 100644 --- a/README +++ b/README @@ -66,6 +66,22 @@ service states by writing the global 'manager_status'. That data structure is read by the Local Resource Manager, which performs the real work (start/stop/migrate) services. +=== Possible CRM Service States === + +stopped: Service is stopped (confirmed by LRM) + +request_stop: Service should be stopped. Waiting for + confirmation from LRM. + +started: Service is active an LRM should start it asap. + +fence: Wait for node fencing (service node is not inside + quorate cluster partition). + +migrate: Migrate VM to other node + +error: Service disabled because of LRM errors. + == Local Resource Manager (class PVE::HA::LRM) == The Local Resource Manager (LRM) daemon runs one each node, and