2011-07-11 20:42:22 +04:00
#!/usr/bin/env ruby
2011-01-17 17:27:10 +03:00
# -------------------------------------------------------------------------- #
2016-05-04 13:33:23 +03:00
# Copyright 2002-2016, OpenNebula Project, OpenNebula Systems #
2011-01-17 17:27:10 +03:00
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
# not use this file except in compliance with the License. You may obtain #
# a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
#--------------------------------------------------------------------------- #
2014-08-24 14:05:01 +04:00
##############################################################################
2011-01-17 17:27:10 +03:00
# Script to implement host failure tolerance
2011-07-11 20:42:22 +04:00
# It can be set to
2014-08-24 14:05:01 +04:00
# -m migrate VMs to another host. Only for images in shared storage
# -r recreate VMs running in the host. State will be lost.
2011-07-11 20:42:22 +04:00
# -d delete VMs running in the host
2012-10-18 21:18:43 +04:00
# Additional flags
# -f force resubmission of suspended VMs
# -p <n> avoid resubmission if host comes
# back after n monitoring cycles
2014-08-24 14:05:01 +04:00
##############################################################################
##############################################################################
# WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!
#
2014-08-24 14:21:27 +04:00
# This script needs to fence the error host to prevent split brain VMs. You
2014-08-24 14:05:01 +04:00
# may use any fence mechanism and invoke it around L105, using host_name
#
# WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!
#############################################################################
2011-01-17 17:27:10 +03:00
ONE_LOCATION = ENV [ " ONE_LOCATION " ]
if ! ONE_LOCATION
RUBY_LIB_LOCATION = " /usr/lib/one/ruby "
VMDIR = " /var/lib/one "
2012-10-18 21:18:43 +04:00
CONFIG_FILE = " /var/lib/one/config "
2011-01-17 17:27:10 +03:00
else
RUBY_LIB_LOCATION = ONE_LOCATION + " /lib/ruby "
VMDIR = ONE_LOCATION + " /var "
2012-10-18 21:18:43 +04:00
CONFIG_FILE = ONE_LOCATION + " /var/config "
2011-01-17 17:27:10 +03:00
end
$: << RUBY_LIB_LOCATION
2013-05-29 12:42:15 +04:00
require 'opennebula'
2012-12-07 15:12:23 +04:00
include OpenNebula
2012-10-19 15:17:27 +04:00
require 'getoptlong'
2011-01-17 17:27:10 +03:00
if ! ( host_id = ARGV [ 0 ] )
exit - 1
end
2013-03-20 17:46:07 +04:00
mode = " -r " # By default, recreate VMs
2013-04-11 18:39:55 +04:00
force = " n " # By default, don't recreate/delete suspended VMs
2013-05-28 19:05:38 +04:00
repeat = nil # By default, don't wait for monitorization cycles"
2011-01-17 17:27:10 +03:00
2012-10-19 15:17:27 +04:00
opts = GetoptLong . new (
2014-08-12 17:56:14 +04:00
[ '--migrate' , '-m' , GetoptLong :: NO_ARGUMENT ] ,
2012-10-19 15:17:27 +04:00
[ '--delete' , '-d' , GetoptLong :: NO_ARGUMENT ] ,
2013-03-20 17:46:07 +04:00
[ '--recreate' , '-r' , GetoptLong :: NO_ARGUMENT ] ,
2012-10-19 15:17:27 +04:00
[ '--force' , '-f' , GetoptLong :: NO_ARGUMENT ] ,
[ '--pause' , '-p' , GetoptLong :: REQUIRED_ARGUMENT ]
)
begin
opts . each do | opt , arg |
case opt
2014-08-12 17:56:14 +04:00
when '--migrate'
mode = " -m "
2012-10-19 15:17:27 +04:00
when '--delete'
mode = " -d "
2013-03-20 17:46:07 +04:00
when '--recreate'
2012-10-19 15:17:27 +04:00
mode = " -r "
when '--force'
force = " y "
when '--pause'
repeat = arg . to_i
end
end
rescue Exception = > e
exit ( - 1 )
end
2011-01-17 20:26:36 +03:00
2011-01-17 17:27:10 +03:00
begin
client = Client . new ( )
rescue Exception = > e
puts " Error: #{ e } "
exit - 1
end
# Retrieve hostname
host = OpenNebula :: Host . new_with_id ( host_id , client )
2012-11-20 18:24:47 +04:00
rc = host . info
exit - 1 if OpenNebula . is_error? ( rc )
2011-01-17 17:27:10 +03:00
host_name = host . name
2012-10-18 21:18:43 +04:00
if repeat
# Retrieve host monitor interval
monitor_interval = nil
2014-08-28 17:27:13 +04:00
File . readlines ( CONFIG_FILE ) . each { | line |
2014-01-29 22:16:03 +04:00
monitor_interval = line . split ( " = " ) . last . to_i if / MONITORING_INTERVAL / =~ line
2012-10-18 21:18:43 +04:00
}
# Sleep through the desired number of monitor interval
sleep ( repeat * monitor_interval )
# If the host came back, exit! avoid duplicated VMs
exit 0 if host . state != 3
end
2011-01-17 17:27:10 +03:00
# Loop through all vms
2011-07-11 20:42:22 +04:00
vms = VirtualMachinePool . new ( client )
2012-11-20 18:24:47 +04:00
rc = vms . info_all
exit - 1 if OpenNebula . is_error? ( rc )
2011-01-17 17:27:10 +03:00
2011-07-11 20:42:22 +04:00
state = " STATE=3 "
2016-06-03 21:04:28 +03:00
state += " or STATE=5 or STATE=8 " if force == " y "
2011-01-17 17:27:10 +03:00
2011-07-11 20:42:22 +04:00
vm_ids_array = vms . retrieve_elements ( " /VM_POOL/VM[ #{ state } ]/HISTORY_RECORDS/HISTORY[HOSTNAME= \" #{ host_name } \" and last()]/../../ID " )
2011-01-17 20:26:36 +03:00
2011-01-18 20:37:02 +03:00
if vm_ids_array
2011-07-11 20:42:22 +04:00
vm_ids_array . each do | vm_id |
2011-01-17 20:26:36 +03:00
vm = OpenNebula :: VirtualMachine . new_with_id ( vm_id , client )
vm . info
if mode == " -r "
2013-04-11 18:39:55 +04:00
vm . delete ( true )
2011-01-17 20:26:36 +03:00
elsif mode == " -d "
2013-04-11 18:39:55 +04:00
vm . delete
2014-08-12 17:56:14 +04:00
elsif mode == " -m "
vm . resched
2011-01-17 20:26:36 +03:00
end
end
end