mirror of
https://github.com/OpenNebula/one.git
synced 2025-03-29 18:50:08 +03:00
improve rescue of VM when node fail is detected
* Introduce new LCM states PROLOG_MIGRATE_UNKNOWN and PROLOG_MIGRATE_UNKNOWN_FAILURE * Change VM migrate logic for when state is ACTIVE and lcm_state is UNKNOWN to call TM's PROLOG_MIGR action before VMM's BOOT All core TM drivers that are not empty are skipping disks so there is no impact on the default behaviour The datastore addon drivers that implement access to raw block devices should check if the LCM_STATE == 60 (PROLOG_MIGRATE_UNKNOWN) and to remove block device access from the failed node and provide access to the current node. There is a simple script function added to get LCM_STATE that can be used as follow ```bash LCM_STATE=$(lcm_state) if [ "$LCM_STATE" = "60" ] fi ``` (cherry picked from commit 676f36e0aa4f6ca705f60b826fd52d69888d7bb9)
This commit is contained in:
parent
d2b66f0ec5
commit
f9375eccab
@ -166,7 +166,9 @@ public:
|
||||
DISK_SNAPSHOT_DELETE_SUSPENDED = 56,
|
||||
DISK_SNAPSHOT = 57,
|
||||
DISK_SNAPSHOT_REVERT = 58,
|
||||
DISK_SNAPSHOT_DELETE = 59
|
||||
DISK_SNAPSHOT_DELETE = 59,
|
||||
PROLOG_MIGRATE_UNKNOWN = 60,
|
||||
PROLOG_MIGRATE_UNKNOWN_FAILURE = 61
|
||||
};
|
||||
|
||||
static int lcm_state_from_str(string& st, LcmState& state)
|
||||
@ -231,6 +233,8 @@ public:
|
||||
else if ( st == "DISK_SNAPSHOT") { state = DISK_SNAPSHOT; }
|
||||
else if ( st == "DISK_SNAPSHOT_REVERT") { state = DISK_SNAPSHOT_REVERT; }
|
||||
else if ( st == "DISK_SNAPSHOT_DELETE") { state = DISK_SNAPSHOT_DELETE; }
|
||||
else if ( st == "PROLOG_MIGRATE_UNKNOWN") { state = PROLOG_MIGRATE_UNKNOWN; }
|
||||
else if ( st == "PROLOG_MIGRATE_UNKNOWN_FAILURE") { state = PROLOG_MIGRATE_UNKNOWN_FAILURE; }
|
||||
else {return -1;}
|
||||
|
||||
return 0;
|
||||
@ -298,6 +302,8 @@ public:
|
||||
case DISK_SNAPSHOT: st = "DISK_SNAPSHOT"; break;
|
||||
case DISK_SNAPSHOT_REVERT: st = "DISK_SNAPSHOT_REVERT"; break;
|
||||
case DISK_SNAPSHOT_DELETE: st = "DISK_SNAPSHOT_DELETE"; break;
|
||||
case PROLOG_MIGRATE_UNKNOWN: st = "PROLOG_MIGRATE_UNKNOWN"; break;
|
||||
case PROLOG_MIGRATE_UNKNOWN_FAILURE: st = "PROLOG_MIGRATE_UNKNOWN_FAILURE"; break;
|
||||
}
|
||||
|
||||
return st;
|
||||
|
@ -71,6 +71,8 @@ digraph OpenNebula {
|
||||
prolog_migrate_suspend;
|
||||
prolog_migrate_suspend_failure;
|
||||
prolog_undeploy;
|
||||
prolog_migrate_unknown;
|
||||
prolog_migrate_unknown_failure;
|
||||
color="white"
|
||||
}
|
||||
subgraph {
|
||||
@ -154,7 +156,8 @@ digraph OpenNebula {
|
||||
prolog_migrate -> boot_migrate [style="dashed", color="blue"];
|
||||
boot_migrate -> running [style="dashed", color="blue"];
|
||||
|
||||
unknown -> boot [label="migrate"];
|
||||
unknown -> prolog_migrate_unknown [label="migrate"];
|
||||
prolog_migrate_unknown -> boot [style="dashed", color="blue"];
|
||||
|
||||
poweroff -> prolog_migrate_poweroff [label="migrate"];
|
||||
prolog_migrate_poweroff -> poweroff [style="dashed", color="blue"];
|
||||
@ -304,6 +307,9 @@ digraph OpenNebula {
|
||||
prolog_resume -> stopped [style="dotted", color="red"];
|
||||
prolog_undeploy -> undeployed [style="dotted", color="red"];
|
||||
|
||||
prolog_migrate_unknown -> prolog_migrate_unknown_failure [label=" ", style="dotted", color="red"];
|
||||
prolog_migrate_unknown_failure -> prolog_migrate_unknown [label="migrate"];
|
||||
|
||||
boot -> boot_failure [label=" ", style="dotted", color="red"];
|
||||
boot_migrate -> boot_migrate_failure [label=" ", style="dotted", color="red"];
|
||||
boot_poweroff -> poweroff [style="dotted", color="red"];
|
||||
|
@ -174,7 +174,9 @@
|
||||
DISK_SNAPSHOT_DELETE_SUSPENDED = 56,
|
||||
DISK_SNAPSHOT = 57,
|
||||
DISK_SNAPSHOT_REVERT = 58,
|
||||
DISK_SNAPSHOT_DELETE = 59
|
||||
DISK_SNAPSHOT_DELETE = 59,
|
||||
PROLOG_MIGRATE_UNKNOWN = 60,
|
||||
PROLOG_MIGRATE_UNKNOWN_FAILURE = 61
|
||||
-->
|
||||
<xs:element name="LCM_STATE" type="xs:integer"/>
|
||||
<xs:element name="PREV_STATE" type="xs:integer"/>
|
||||
|
@ -105,7 +105,9 @@
|
||||
DISK_SNAPSHOT_DELETE_SUSPENDED = 56,
|
||||
DISK_SNAPSHOT = 57,
|
||||
DISK_SNAPSHOT_REVERT = 58,
|
||||
DISK_SNAPSHOT_DELETE = 59
|
||||
DISK_SNAPSHOT_DELETE = 59,
|
||||
PROLOG_MIGRATE_UNKNOWN = 60,
|
||||
PROLOG_MIGRATE_UNKNOWN_FAILURE = 61
|
||||
-->
|
||||
<xs:element name="LCM_STATE" type="xs:integer"/>
|
||||
<xs:element name="PREV_STATE" type="xs:integer"/>
|
||||
|
@ -336,6 +336,7 @@ class OneVMHelper < OpenNebulaHelper::OneHelper
|
||||
:PROLOG_MIGRATE_FAILURE => :migrate,
|
||||
:PROLOG_MIGRATE_POWEROFF_FAILURE => :migrate,
|
||||
:PROLOG_MIGRATE_SUSPEND_FAILURE => :migrate,
|
||||
:PROLOG_MIGRATE_UNKNOWN_FAILURE => :migrate,
|
||||
:PROLOG_FAILURE => :prolog,
|
||||
:PROLOG_RESUME_FAILURE => :resume,
|
||||
:PROLOG_UNDEPLOY_FAILURE => :resume,
|
||||
|
@ -484,6 +484,7 @@ module OpenNebula
|
||||
lcm_state_str == 'EPILOG_UNDEPLOY_FAILURE' ||
|
||||
lcm_state_str == 'PROLOG_MIGRATE_POWEROFF_FAILURE' ||
|
||||
lcm_state_str == 'PROLOG_MIGRATE_SUSPEND_FAILURE' ||
|
||||
lcm_state_str == 'PROLOG_MIGRATE_UNKNOWN_FAILURE' ||
|
||||
lcm_state_str == 'BOOT_UNDEPLOY_FAILURE' ||
|
||||
lcm_state_str == 'BOOT_STOPPED_FAILURE' ||
|
||||
lcm_state_str == 'PROLOG_RESUME_FAILURE' ||
|
||||
|
@ -296,12 +296,12 @@ void LifeCycleManager::migrate_action(int vid)
|
||||
vm->get_lcm_state() == VirtualMachine::UNKNOWN)
|
||||
{
|
||||
//----------------------------------------------------
|
||||
// Bypass SAVE_MIGRATE & PROLOG_MIGRATE goto BOOT
|
||||
// Bypass SAVE_MIGRATE goto PROLOG_MIGRATE_UNKNOWN
|
||||
//----------------------------------------------------
|
||||
|
||||
vm->set_resched(false);
|
||||
|
||||
vm->set_state(VirtualMachine::BOOT);
|
||||
vm->set_state(VirtualMachine::PROLOG_MIGRATE_UNKNOWN);
|
||||
|
||||
vm->delete_snapshots();
|
||||
|
||||
@ -311,17 +311,7 @@ void LifeCycleManager::migrate_action(int vid)
|
||||
|
||||
vm->set_stime(the_time);
|
||||
|
||||
vm->set_previous_action(History::MIGRATE_ACTION);
|
||||
|
||||
vm->set_previous_etime(the_time);
|
||||
|
||||
vm->set_previous_vm_info();
|
||||
|
||||
vm->set_previous_running_etime(the_time);
|
||||
|
||||
vm->set_previous_reason(History::USER);
|
||||
|
||||
vmpool->update_previous_history(vm);
|
||||
vm->set_prolog_stime(the_time);
|
||||
|
||||
vmpool->update_history(vm);
|
||||
|
||||
@ -334,7 +324,7 @@ void LifeCycleManager::migrate_action(int vid)
|
||||
|
||||
//----------------------------------------------------
|
||||
|
||||
vmm->trigger(VirtualMachineManager::DEPLOY, vid);
|
||||
tm->trigger(TransferManager::PROLOG_MIGR,vid);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1072,6 +1062,8 @@ void LifeCycleManager::clean_up_vm(VirtualMachine * vm, bool dispose, int& imag
|
||||
case VirtualMachine::PROLOG_MIGRATE_POWEROFF_FAILURE:
|
||||
case VirtualMachine::PROLOG_MIGRATE_SUSPEND:
|
||||
case VirtualMachine::PROLOG_MIGRATE_SUSPEND_FAILURE:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN_FAILURE:
|
||||
vm->set_prolog_etime(the_time);
|
||||
vmpool->update_history(vm);
|
||||
|
||||
@ -1388,6 +1380,14 @@ void LifeCycleManager::retry(VirtualMachine * vm)
|
||||
tm->trigger(TransferManager::PROLOG_MIGR, vid);
|
||||
break;
|
||||
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN_FAILURE:
|
||||
vm->set_state(VirtualMachine::PROLOG_MIGRATE_UNKNOWN);
|
||||
|
||||
vmpool->update(vm);
|
||||
|
||||
tm->trigger(TransferManager::PROLOG_MIGR, vid);
|
||||
break;
|
||||
|
||||
case VirtualMachine::PROLOG_RESUME_FAILURE:
|
||||
vm->set_state(VirtualMachine::PROLOG_RESUME);
|
||||
|
||||
@ -1479,6 +1479,7 @@ void LifeCycleManager::retry(VirtualMachine * vm)
|
||||
case VirtualMachine::PROLOG_MIGRATE:
|
||||
case VirtualMachine::PROLOG_MIGRATE_POWEROFF:
|
||||
case VirtualMachine::PROLOG_MIGRATE_SUSPEND:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
tm->trigger(TransferManager::PROLOG_MIGR,vid);
|
||||
break;
|
||||
|
||||
|
@ -639,6 +639,8 @@ void LifeCycleManager::prolog_success_action(int vid)
|
||||
case VirtualMachine::PROLOG_MIGRATE_FAILURE: //recover success
|
||||
case VirtualMachine::PROLOG:
|
||||
case VirtualMachine::PROLOG_FAILURE: //recover success
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN_FAILURE: //recover success
|
||||
switch (lcm_state)
|
||||
{
|
||||
case VirtualMachine::PROLOG_RESUME:
|
||||
@ -665,6 +667,12 @@ void LifeCycleManager::prolog_success_action(int vid)
|
||||
vm->set_state(VirtualMachine::BOOT);
|
||||
break;
|
||||
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN_FAILURE: //recover success
|
||||
action = VirtualMachineManager::DEPLOY;
|
||||
vm->set_state(VirtualMachine::BOOT);
|
||||
break;
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
@ -139,7 +139,9 @@ public class VirtualMachine extends PoolElement{
|
||||
"DISK_SNAPSHOT_DELETE_SUSPENDED",
|
||||
"DISK_SNAPSHOT",
|
||||
"DISK_SNAPSHOT_REVERT",
|
||||
"DISK_SNAPSHOT_DELETE"
|
||||
"DISK_SNAPSHOT_DELETE",
|
||||
"PROLOG_MIGRATE_UNKNOWN",
|
||||
"PROLOG_MIGRATE_UNKNOWN_FAILURE"
|
||||
};
|
||||
|
||||
private static final String[] SHORT_LCM_STATES =
|
||||
@ -203,7 +205,9 @@ public class VirtualMachine extends PoolElement{
|
||||
"snap", // DISK_SNAPSHOT_DELETE_SUSPENDED
|
||||
"snap", // DISK_SNAPSHOT
|
||||
"snap", // DISK_SNAPSHOT_REVERT
|
||||
"snap" // DISK_SNAPSHOT_DELETE
|
||||
"snap", // DISK_SNAPSHOT_DELETE
|
||||
"migr", // PROLOG_MIGRATE_UNKNOWN
|
||||
"fail" // PROLOG_MIGRATE_UNKNOWN_FAILURE
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -113,6 +113,8 @@ module OpenNebula
|
||||
DISK_SNAPSHOT
|
||||
DISK_SNAPSHOT_REVERT
|
||||
DISK_SNAPSHOT_DELETE
|
||||
PROLOG_MIGRATE_UNKNOWN
|
||||
PROLOG_MIGRATE_UNKNOWN_FAILURE
|
||||
}
|
||||
|
||||
SHORT_VM_STATES={
|
||||
@ -187,7 +189,9 @@ module OpenNebula
|
||||
"DISK_SNAPSHOT_DELETE_SUSPENDED"=> "snap",
|
||||
"DISK_SNAPSHOT" => "snap",
|
||||
"DISK_SNAPSHOT_REVERT" => "snap",
|
||||
"DISK_SNAPSHOT_DELETE" => "snap"
|
||||
"DISK_SNAPSHOT_DELETE" => "snap",
|
||||
"PROLOG_MIGRATE_UNKNOWN" => "migr",
|
||||
"PROLOG_MIGRATE_UNKNOWN_FAILURE" => "fail"
|
||||
}
|
||||
|
||||
MIGRATE_REASON=%w{NONE ERROR USER}
|
||||
|
@ -91,7 +91,9 @@ VNC_STATES = [
|
||||
#56, #DISK_SNAPSHOT_DELETE_SUSPENDED
|
||||
#57, #DISK_SNAPSHOT
|
||||
#58, #DISK_SNAPSHOT_REVERT
|
||||
#59 #DISK_SNAPSHOT_DELETE
|
||||
#59, #DISK_SNAPSHOT_DELETE
|
||||
#60, #PROLOG_MIGRATE_UNKNOWN
|
||||
#61 #PROLOG_MIGRATE_UNKNOWN_FAILURE
|
||||
]
|
||||
|
||||
class OpenNebulaVNC
|
||||
|
@ -109,6 +109,8 @@ define(function(require) {
|
||||
"DISK_SNAPSHOT",
|
||||
"DISK_SNAPSHOT_REVERT",
|
||||
"DISK_SNAPSHOT_DELETE",
|
||||
"PROLOG_MIGRATE_UNKNOWN",
|
||||
"PROLOG_MIGRATE_UNKNOWN_FAILURE",
|
||||
];
|
||||
|
||||
var LCM_STATES = {
|
||||
@ -171,7 +173,9 @@ define(function(require) {
|
||||
DISK_SNAPSHOT_DELETE_SUSPENDED : 56,
|
||||
DISK_SNAPSHOT : 57,
|
||||
DISK_SNAPSHOT_REVERT : 58,
|
||||
DISK_SNAPSHOT_DELETE : 59
|
||||
DISK_SNAPSHOT_DELETE : 59,
|
||||
PROLOG_MIGRATE_UNKNOWN : 60,
|
||||
PROLOG_MIGRATE_UNKNOWN_FAILURE : 61
|
||||
};
|
||||
|
||||
var SHORT_LCM_STATES_STR = [
|
||||
@ -235,6 +239,8 @@ define(function(require) {
|
||||
Locale.tr("SNAPSHOT"), // DISK_SNAPSHOT
|
||||
Locale.tr("SNAPSHOT"), // DISK_SNAPSHOT_REVERT
|
||||
Locale.tr("SNAPSHOT"), // DISK_SNAPSHOT_DELETE
|
||||
Locale.tr("MIGRATE"), // PROLOG_MIGRATE_UNKNOWN
|
||||
Locale.tr("FAILURE"), // PROLOG_MIGRATE_UNKNOWN_FAILURE
|
||||
];
|
||||
|
||||
var VNC_STATES = [
|
||||
@ -543,6 +549,7 @@ define(function(require) {
|
||||
case LCM_STATES.BOOT_STOPPED_FAILURE:
|
||||
case LCM_STATES.PROLOG_RESUME_FAILURE:
|
||||
case LCM_STATES.PROLOG_UNDEPLOY_FAILURE:
|
||||
case LCM_STATES.PROLOG_MIGRATE_UNKNOWN_FAILURE:
|
||||
return true;
|
||||
|
||||
default:
|
||||
|
@ -1003,6 +1003,7 @@ define(function(require) {
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_POWEROFF:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_SUSPEND:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_UNKNOWN:
|
||||
state_color = 'running';
|
||||
state_str = Locale.tr("RUNNING");
|
||||
break;
|
||||
@ -1030,6 +1031,7 @@ define(function(require) {
|
||||
case OpenNebulaVM.LCM_STATES.BOOT_STOPPED_FAILURE:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_RESUME_FAILURE:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_UNDEPLOY_FAILURE:
|
||||
case OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_UNKNOWN_FAILURE:
|
||||
state_color = 'error';
|
||||
state_str = Locale.tr("ERROR");
|
||||
break;
|
||||
|
@ -113,6 +113,8 @@ define(function(require) {
|
||||
LCM_STATE_ACTIONS[ OpenNebulaVM.LCM_STATES.DISK_SNAPSHOT ] = [];
|
||||
LCM_STATE_ACTIONS[ OpenNebulaVM.LCM_STATES.DISK_SNAPSHOT_REVERT ] = [];
|
||||
LCM_STATE_ACTIONS[ OpenNebulaVM.LCM_STATES.DISK_SNAPSHOT_DELETE ] = [];
|
||||
LCM_STATE_ACTIONS[ OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_UNKNOWN ] = [];
|
||||
LCM_STATE_ACTIONS[ OpenNebulaVM.LCM_STATES.PROLOG_MIGRATE_UNKNOWN_FAILURE ] = [];
|
||||
|
||||
return {
|
||||
'disableAllStateActions': disableAllStateActions,
|
||||
|
@ -123,6 +123,7 @@ void TransferManagerDriver::protocol(const string& message) const
|
||||
case VirtualMachine::PROLOG_UNDEPLOY:
|
||||
case VirtualMachine::PROLOG_MIGRATE_POWEROFF:
|
||||
case VirtualMachine::PROLOG_MIGRATE_SUSPEND:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
lcm_action = LifeCycleManager::PROLOG_SUCCESS;
|
||||
break;
|
||||
|
||||
@ -188,6 +189,7 @@ void TransferManagerDriver::protocol(const string& message) const
|
||||
case VirtualMachine::PROLOG_UNDEPLOY:
|
||||
case VirtualMachine::PROLOG_MIGRATE_POWEROFF:
|
||||
case VirtualMachine::PROLOG_MIGRATE_SUSPEND:
|
||||
case VirtualMachine::PROLOG_MIGRATE_UNKNOWN:
|
||||
lcm_action = LifeCycleManager::PROLOG_FAILURE;
|
||||
break;
|
||||
|
||||
|
@ -94,4 +94,21 @@ function disk_type
|
||||
DISK_TYPE="${XPATH_ELEMENTS[0]}"
|
||||
|
||||
echo $DISK_TYPE
|
||||
}
|
||||
}
|
||||
|
||||
#Return LCM_STATE
|
||||
function lcm_state
|
||||
{
|
||||
XPATH="${ONE_LOCAL_VAR}/remotes/datastore/xpath.rb --stdin"
|
||||
|
||||
unset i XPATH_ELEMENTS
|
||||
|
||||
while IFS= read -r -d '' element; do
|
||||
XPATH_ELEMENTS[i++]="$element"
|
||||
done < <(onevm show -x $VMID| $XPATH \
|
||||
/VM/LCM_STATE )
|
||||
|
||||
LCM_STATE="${XPATH_ELEMENTS[0]}"
|
||||
|
||||
echo $LCM_STATE
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user