1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-01-08 21:17:43 +03:00

F OpenNebula/one#6368: Improve SSH live migration action

This commits improve non-shared storage live migration:
- The migration proccess now uses suspend to allow for additonal sync,
  including snapshot defined in qcow2 disks.
- The script is now in Ruby to implement a more elaborated logic
- Sync is done through rsync

(cherry picked from commit bb9af9d029)
This commit is contained in:
Ruben S. Montero 2024-03-27 14:46:16 +01:00
parent 0837776d1f
commit 576a2c74a7
No known key found for this signature in database
GPG Key ID: A0CEA6FA880A1D87
3 changed files with 600 additions and 282 deletions

View File

@ -1,4 +1,4 @@
#!/bin/bash
#!/usr/bin/env ruby
# -------------------------------------------------------------------------- #
# Copyright 2002-2023, OpenNebula Project, OpenNebula Systems #
@ -15,296 +15,248 @@
# See the License for the specific language governing permissions and #
# limitations under the License. #
#--------------------------------------------------------------------------- #
ONE_LOCATION = ENV['ONE_LOCATION']
DRIVER_PATH=$(dirname $0)
source "$DRIVER_PATH/../../etc/vmm/kvm/kvmrc"
source "$DRIVER_PATH/../../scripts_common.sh"
XPATH="$DRIVER_PATH/../../datastore/xpath.rb"
get_qemu_img_version() {
qemu-img --version | head -1 | awk '{print $3}' | \
sed -e 's/[^0-9\.]//' | awk -F. '{ printf("%d%03d%03d\n", $1,$2,$3); }'
}
is_readonly() {
local DOMAIN=$1
local DISK=$2
READ_ONLY=$(virsh --connect $LIBVIRT_URI dumpxml $DOMAIN | \
$XPATH --stdin --subtree \
"//domain/devices/disk[source/@file='$DISK']/readonly")
[[ "$READ_ONLY" =~ '<readonly/>' ]]
}
get_size_and_format_of_disk_img() {
local QEMU_IMG_PATH="$1"
local PARAM="$2"
if [[ "$QEMU_IMG_PATH" =~ disk.[0-9]*.snap/ ]]; then
#Disk lives in snap dir it is a link (includes replica)
echo unknown qcow2-symlink
return
elif [ -L "$QEMU_IMG_PATH" ]; then
#Disk is a synlink, assume network-disk
echo unknown network-disk
return
fi
IMG_INFO=$(qemu-img info $PARAM "$QEMU_IMG_PATH" --output json)
if [ -z "$IMG_INFO" ]; then
echo "Failed to get image info for $QEMU_IMG_PATH"
exit 1
fi
SIZE=$(echo $IMG_INFO | sed -nE 's/^.*virtual-size.: ([0-9]+).*/\1/p')
FORMAT=$(echo $IMG_INFO | sed -nE 's/^.*format.: "([a-z0-9]+)".*/\1/p')
if [ -z "$SIZE" ] || [ -z "$FORMAT" ]; then
echo "Failed to get image $QEMU_IMG_PATH size or format"
exit 1
fi
echo $SIZE $FORMAT
}
create_target_disk_img() {
local DEST_HOST=$1
local QEMU_IMG_PATH="$2"
local SIZE="$3"
local DISK_DIR="$(dirname $QEMU_IMG_PATH)"
ssh_monitor_and_log "$DEST_HOST" \
"mkdir -p '$DISK_DIR' && qemu-img create -f qcow2 '$QEMU_IMG_PATH' '$SIZE'" \
"Failed to create new qcow image for $QEMU_IMG_PATH"
}
STDIN=$(cat -)
DEPLOY_ID=$1
DEST_HOST=$2
DISKS=$(virsh --connect $LIBVIRT_URI domblklist "$DEPLOY_ID" \
| tail -n+3 | grep -v "^$" | awk '{print $1 "," $2}')
unset i j XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(echo $STDIN| $XPATH \
/VMM_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SHARED \
/VMM_DRIVER_ACTION_DATA/DISK_TARGET_PATH)
SHARED="${XPATH_ELEMENTS[j++]}"
VM_DIR="${XPATH_ELEMENTS[j++]}"
# use "force-share" param for qemu >= 2.10
[ "$(get_qemu_img_version)" -ge 2010000 ] && QEMU_IMG_PARAM="-U"
# migration can't be done with domain snapshots, drop them first but save current snapshot for redefine
SNAP_CUR=$(virsh --connect $LIBVIRT_URI snapshot-current --name $DEPLOY_ID 2>/dev/null)
SNAPS=$(monitor_and_log \
"virsh --connect $LIBVIRT_URI snapshot-list $DEPLOY_ID --name 2>/dev/null" \
"Failed to get snapshots for $DEPLOY_ID")
for SNAP in $SNAPS; do
exec_and_log \
"virsh --connect $LIBVIRT_URI snapshot-delete $DEPLOY_ID --snapshotname $SNAP --metadata" \
"Failed to delete snapshot $SNAP from $DEPLOY_ID"
done
# Compact memory
if [ "x$CLEANUP_MEMORY_ON_START" = "xyes" ]; then
ssh_exec_and_log "$DEST_HOST" "(sudo -l | grep -q sysctl) && sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 >/dev/null || true" \
"Failed compact memory on $DEST_HOST"
fi
if [ "$SHARED" = "YES" ]; then
retry_if_no_error "active block job" 3 5 virsh --connect $LIBVIRT_URI migrate \
--live $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system
RC=$?
if !ONE_LOCATION
RUBY_LIB_LOCATION = '/usr/lib/one/ruby'
GEMS_LOCATION = '/usr/share/one/gems'
VMDIR = '/var/lib/one'
CONFIG_FILE = '/var/lib/one/config'
else
if [[ -z "$DISKS" ]]; then
error_message "No disks discovered on the VM"
exit 1
fi
RUBY_LIB_LOCATION = ONE_LOCATION + '/lib/ruby'
GEMS_LOCATION = ONE_LOCATION + '/share/gems'
VMDIR = ONE_LOCATION + '/var'
CONFIG_FILE = ONE_LOCATION + '/var/config'
end
ssh_monitor_and_log "$DEST_HOST" "mkdir -p '$VM_DIR'" \
"Failed to make remote directory $VM_DIR image"
# %%RUBYGEMS_SETUP_BEGIN%%
if File.directory?(GEMS_LOCATION)
real_gems_path = File.realpath(GEMS_LOCATION)
if !defined?(Gem) || Gem.path != [real_gems_path]
$LOAD_PATH.reject! {|l| l =~ /vendor_ruby/ }
MIGRATE_DISKS=""
# Suppress warnings from Rubygems
# https://github.com/OpenNebula/one/issues/5379
begin
verb = $VERBOSE
$VERBOSE = nil
require 'rubygems'
Gem.use_paths(real_gems_path)
ensure
$VERBOSE = verb
end
end
end
# %%RUBYGEMS_SETUP_END%%
for DISK_STR in $DISKS; do
$LOAD_PATH << RUBY_LIB_LOCATION
DISK_DEV=${DISK_STR/,*/}
DISK_PATH=${DISK_STR/*,/}
require 'pathname'
read -r SIZE FORMAT <<<"$(get_size_and_format_of_disk_img "$DISK_PATH" "$QEMU_IMG_PARAM")"
require_relative 'opennebula_vm'
require_relative '../lib/command'
require_relative '../lib/xmlparser'
if [ "$FORMAT" = "raw" ]; then
if ! is_readonly $DEPLOY_ID $DISK_PATH; then
RAW_DISKS+=" $DISK_PATH"
MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"
fi
include VirtualMachineManagerKVM
include Command
# do initial rsync
multiline_exec_and_log "$TAR -cSf - $DISK_PATH | $SSH $DEST_HOST '$TAR -xSf - -C / '" \
"Failed to rsync disk $DISK_PATH to $DEST_HOST:$DISK_PATH"
load_remote_env
elif [ "$FORMAT" = "qcow2" ]; then
create_target_disk_img "$DEST_HOST" "$DISK_PATH" "$SIZE"
MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"
# ------------------------------------------------------------------------------
# HELPER FUNCTIONS.
# Note input parameters are defined as instance variables
# - @deploy_id
# - @dst_host
# - @vm_dir
# - @shared
# ------------------------------------------------------------------------------
elif [ "$FORMAT" = "qcow2-symlink" ]; then
# don't create disk, .snap dir will be copied anyway
MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}"
# sync paths to the destination VM folder
#
# @param paths [Array/String] Array of paths to sync
#
def rsync_paths(paths, raise_error = true)
return if paths.empty?
elif [ "$FORMAT" = "network-disk" ]; then
true # skip
fi
opts = '-az'
paths = paths.join(' ') if paths.class == Array
dpath = "#{@dst_host}:#{@vm_dir}/"
# copy disk snapshots
if [[ "$DISK_PATH" =~ (disk\.[0-9]*\.snap)/. ]]; then
SNAP_DIR="$VM_DIR/${BASH_REMATCH[1]}"
elif [ -d "${DISK_PATH}.snap" ]; then #it should be included in the clause above
SNAP_DIR="${DISK_PATH}.snap"
fi
tini = Time.now
if [ -n "${SNAP_DIR}" ]; then
multiline_exec_and_log "$TAR -cSf - ${SNAP_DIR} | $SSH $DEST_HOST '$TAR -xSf - -C / '" \
"Failed to rsync disk snapshot ${SNAP_DIR} to $DEST_HOST"
fi
rc, _o, e = Command.execute_log("rsync #{opts} #{paths} #{dpath}")
# recreate symlinks
if [ -L "$DISK_PATH" ]; then
TARGET=$(readlink $DISK_PATH)
LNAME=$DISK_PATH
elif [[ "$DISK_PATH" =~ (disk\.([0-9]*)\.snap/.*) ]]; then
TARGET=${BASH_REMATCH[1]}
LNAME="disk.${BASH_REMATCH[2]}"
else
continue
fi
STDERR.puts "rsync time #{Time.now-tini}s"
ssh_exec_and_log "$DEST_HOST" "cd ${VM_DIR}; [ -L \"$LNAME\" ] || ln -s \"$TARGET\" \"$LNAME\""
"Failed to create symlink $TARGET -> $LNAME on $DEST_HOST"
done
raise StandardError, "Cannot rsync files: #{e}" if rc != 0 && raise_error
end
# copy vm.xml and ds.xml from the $VM_DIR
if ls $VM_DIR/*.xml > /dev/null; then
multiline_exec_and_log "$TAR -cSf - $VM_DIR/*.xml | $SSH $DEST_HOST '$TAR -xSf - -C / '" \
"Failed to copy xml files to $DEST_HOST"
fi
# In case of error this function is used to remove migration leftovers. For non
# shared storage configuration it will remove the destination VM folder
#
# @param kvm_vm [KvmDomain] libvirt domain class
# @param error[String] message for the StandardError raised by the function
#
def cleanup_host(kvm_vm, error)
kvm_vm.destroy @dst_host
# freeze/suspend domain and rsync raw disks again
if [ -n "$RAW_DISKS" ]; then
if timeout ${VIRSH_TIMEOUT:-60} virsh --connect $LIBVIRT_URI domfsfreeze $DEPLOY_ID; then
# local domfsthaw for the case migration fails
trap "virsh --connect $LIBVIRT_URI domfsthaw $DEPLOY_ID" EXIT
FREEZE="yes"
else
if virsh --connect $LIBVIRT_URI suspend $DEPLOY_ID; then
# local resume for the case migration fails
trap "virsh --connect $LIBVIRT_URI resume $DEPLOY_ID" EXIT
SUSPEND="yes"
kvm_vm.undefine @dst_host
if !@shared
Command.ssh(:host => @dst_host, :cmds => "rm -rf #{@vm_dir}")
end
raise StandardError, error
end
# Migrate VMs running on local storage, using the copy-storage feature of libvirt
# Disks are scanned and classified as:
# - Regular disks are copied during migration (devs array). A place holder needs
# to be created in the destination host
#
# - Readonly disks are copied before starting the migration (pre_sync array)
#
# - Snapshots and other ancialliary files are also copied in the pre_sync phase
#
# - Network disks are assumed to be shared and not copied
#
# - qcow2 disks with system snapshots are rsync after migration to transfer
# the snapshots (wiped out during the migration)
#
# To allow this post sync phase the VM is paused after migration (--suspend)
#
# @param kvm_vm [KvmDomain] libvirt domain class
def local_migration(kvm_vm)
devs = []
pre_sync = ["#{@vm_dir}/*.xml"]
post_sync = []
kvm_vm.disks.each do |disk|
dev = disk[0]
path = disk[1]
if !File.symlink? path #qcow2 & raw disks, regular files
qimg = QemuImg.new path
format = qimg['format']
size = qimg['virtual-size']
snaps = qimg['snapshots'] && !qimg['snapshots'].empty?
if format == 'raw' && kvm_vm.readonly?(path)
pre_sync << path
else
error_message "Could not freeze or suspend the domain"
exit 1
fi
fi
devs << dev
post_sync << path if format == 'qcow2' && snaps
for DISK in $RAW_DISKS; do
multiline_exec_and_log "$TAR -cSf - $DISK | $SSH $DEST_HOST '$TAR -xSf - -C / '" \
"Failed to rsync disk $DISK to $DEST_HOST:$DISK"
done
fi
cmds =<<~EOS
mkdir -p #{File.dirname(path)}
qemu-img create -f #{format} #{path} #{size}
EOS
# Enumerate disks to copy
if [ -n "$MIGRATE_DISKS" ]; then
DISK_OPTS="--copy-storage-all --migrate-disks ${MIGRATE_DISKS}"
fi
Command.ssh(:host => @dst_host, :cmds => cmds,
:emsg => 'Cannot create disk')
end
elsif path.match(/disk.[0-9]*.snap\//) #qcow2-symlink, replica
devs << dev
#else
#network-disk, symlinks are assumed to be network disks
end
retry_if_no_error "active block job" 3 5 \
virsh --connect $LIBVIRT_URI migrate \
--live $MIGRATE_OPTIONS $DEPLOY_ID $QEMU_PROTOCOL://$DEST_HOST/system \
$DISK_OPTS
RC=$?
# Add disk snapshots dir to the list of paths to sync
if File.directory? "#{path}.snap"
pre_sync << Pathname.new("#{path}.snap").cleanpath
elsif (m = path.match(/(disk.[0-9]*.snap)\//)) #replica
pre_sync << Pathname.new("#{@vm_dir}/#{m[1]}").cleanpath
end
# remote domfsthaw/resume, give it time
if [ $RC -eq 0 ]; then
if [ "$FREEZE" = "yes" ]; then
for I in $(seq 5); do
virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system domfsthaw $DEPLOY_ID \
&& break
sleep 2
done
elif [ "$SUSPEND" = "yes" ]; then
for I in $(seq 5); do
virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system resume $DEPLOY_ID \
&& break
sleep 2
done
fi
fi
fi
# cleanup target host in case of error
if [ $RC -ne 0 ]; then
for CLEAN_OP in destroy undefine; do
virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system "${CLEAN_OP}" $DEPLOY_ID >/dev/null 2>&1
done
if [ "$SHARED" != "YES" ]; then
ssh $DEST_HOST "rm -rf $VM_DIR"
fi
error_message "Could not migrate $DEPLOY_ID to $DEST_HOST"
exit $RC
fi
# redefine potential snapshots after live migration
if [ "$SHARED" = "YES" ]; then
UUID=$(virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system dominfo $DEPLOY_ID | awk '/UUID:/ {print $2}')
DISK_PATH=$(virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system domblklist $DEPLOY_ID | awk '/disk.0/ {print $2}')
DISK_DIR=$(dirname $DISK_PATH)
if [[ "${DISK_DIR}" =~ .*/disk\.[0-9]+.snap$ ]]; then
DISK_DIR=$(dirname $DISK_DIR)
fi
for SNAPSHOT_MD_XML in $(ls -v ${DISK_DIR}/snap-*.xml 2>/dev/null); do
# replace uuid in the snapshot metadata xml
sed -i "s%<uuid>[[:alnum:]-]*</uuid>%<uuid>$UUID</uuid>%" $SNAPSHOT_MD_XML
# redefine the snapshot using the xml metadata file
virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system snapshot-create $DEPLOY_ID $SNAPSHOT_MD_XML --redefine > /dev/null || true
done
[ -n "$SNAP_CUR" ] && virsh --connect $QEMU_PROTOCOL://$DEST_HOST/system snapshot-current $DEPLOY_ID $SNAP_CUR
fi
# Synchronize VM time on background on remote host
if [ "$SYNC_TIME" = "yes" ]; then
SYNC_TIME_CMD=$(cat <<EOF
(
for I in \$(seq 4 -1 1); do
if virsh --connect $LIBVIRT_URI --readonly dominfo $DEPLOY_ID; then
virsh --connect $LIBVIRT_URI domtime --sync $DEPLOY_ID && exit
[ "\$I" -gt 1 ] && sleep 5
#recreate disk symlinks
if File.symlink? path
target = File.readlink(path)
lname = path
elsif (m = path.match(/(disk.([0-9]*).snap)\/.*/)) #replica
target = m[1]
lname = "disk.#{m[2]}"
else
exit
fi
done
) &>/dev/null &
EOF
)
ssh_exec_and_log_no_error "${DEST_HOST}" \
"${SYNC_TIME_CMD}" \
"Failed to synchronize VM time"
fi
next
end
# Compact memory
if [ "x$CLEANUP_MEMORY_ON_STOP" = "xyes" ]; then
(sudo -l | grep -q sysctl) && sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 &>/dev/null &
fi
cmds =<<~EOS
cd #{@vm_dir}
[ -L "#{lname}" ] || ln -s "#{target}" "#{lname}"
EOS
Command.ssh(:host => @dst_host, :cmds => cmds,
:emsg => 'Cannot symlink disk')
end
rsync_paths(pre_sync)
rc, _out, err = kvm_vm.live_migrate_disks(@dst_host, devs)
cleanup_host(err) if rc != 0
rsync_paths(post_sync)
kvm_vm.resume(@dst_host)
end
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
begin
@deploy_id = ARGV[0]
@dst_host = ARGV[1]
action_xml = XMLElement.new_s(STDIN.read)
@vm_dir = Pathname.new(action_xml['/VMM_DRIVER_ACTION_DATA/DISK_TARGET_PATH']).cleanpath
@shared = action_xml['/VMM_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/SHARED'].casecmp('YES') == 0
kvm_vm = KvmDomain.new(@deploy_id)
# Migration can't be done with domain snapshots, drop them first
kvm_vm.snapshots_delete
# Migrate VMs using shared/local storage
if @shared
rc, _out, err = kvm_vm.live_migrate(@dst_host)
cleanup_host(kvm_vm, err) if rc != 0
else
local_migration(kvm_vm)
end
# Redefine system snapshots on the destination libvirtd
kvm_vm.snapshots_redefine(@dst_host, @vm_dir)
# Sync guest time
if ENV['SYNC_TIME'].upcase == 'YES'
cmds =<<~EOS
(
for I in $(seq 4 -1 1); do
if #{virsh} --readonly dominfo #{@deploy_id}; then
#{virsh} domtime --sync #{@deploy_id} && exit
[ "\$I" -gt 1 ] && sleep 5
else
exit
fi
done
) &>/dev/null &
EOS
rc, _o, e = Command.ssh(:host => @dst_host, :cmds => cmds, :emsg => '')
STDERR.puts "Failed to synchronize VM time: #{e}" if rc != 0
end
# Compact memory
if ENV['CLEANUP_MEMORY_ON_STOP'].upcase == 'YES'
`(sudo -l | grep -q sysctl) && sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 &>/dev/null &`
end
rescue StandardError => e
STDERR.puts "Error mirgating VM #{@deploy_id} to host #{@dst_host}: #{e.message}"
STDERR.puts "#{e.backtrace}"
exit(1)
end

View File

@ -29,14 +29,28 @@ module Command
begin
fd = lock if block
STDERR.puts "Running command #{cmd}" if verbose >= 1
stdout, stderr, s = Open3.capture3(cmd, opts)
ensure
unlock(fd) if block
end
STDERR.puts "#{stdout}\n#{stderr}" if verbose == 2
return [s.exitstatus, stdout, stderr] if verbose <= 0
stdin = if opts[:stdin_data]
opts[:stdin_data].lines.map { |l| "[stdin]: #{l}" }.join
else
''
end
if s.exitstatus == 0
STDERR.puts cmd
STDERR.puts "#{stdin}" unless stdin.empty?
else
STDERR.puts "Error executing: #{cmd}"
STDERR.puts "#{stdin}" unless stdin.empty?
STDERR.puts "\t[stdout]: #{stdout}" unless stdout.empty?
STDERR.puts "\t[stderr]: #{stderr}" unless stderr.empty?
end
[s.exitstatus, stdout, stderr]
end
@ -47,20 +61,14 @@ module Command
# Returns true/false if status is 0/!=0 and logs error if needed
def self.execute_rc_log(cmd, lock = false)
rc, _stdout, stderr = execute(cmd, lock, 1)
rc = execute(cmd, lock, 1)
STDERR.puts stderr unless rc.zero?
rc.zero?
rc[0] == 0
end
# Execute cmd and logs error if needed
def self.execute_log(cmd, lock = false)
rc = execute(cmd, lock, 1)
STDERR.puts rc[2] unless rc[0].zero?
rc
execute(cmd, lock, 1)
end
def self.execute_detach(cmd)
@ -87,4 +95,35 @@ module Command
lfd.close
end
def self.ssh(options = {})
opt = {
:cmds => '',
:host => '',
:forward => false,
:nostdout => false,
:nostderr => false,
:verbose => 1,
:block => false,
:emsg => ''
}.merge!(options)
script = <<~EOS
export LANG=C
export LC_ALL=C
#{opt[:cmds]}
EOS
cmd = 'ssh -o ControlMaster=no -o ControlPath=none'
cmd << ' -o ForwardAgent=yes' if opt[:forward]
cmd << " #{opt[:host]} bash -s "
cmd << ' 1>/dev/null' if opt[:nostdout]
cmd << ' 2>/dev/null' if opt[:nostderr]
r, o, e = execute(cmd, opt[:block], opt[:verbose], :stdin_data => script)
return [r, o, e] if r == 0 || emsg.empty?
raise StandardError, "#{emsg}: #{e}"
end
end

View File

@ -14,8 +14,11 @@
# limitations under the License. #
#--------------------------------------------------------------------------- #
require_relative '../lib/xmlparser'
require_relative '../lib/command'
require_relative '../lib/opennebula_vm'
require 'json'
# rubocop:disable Style/ClassAndModuleChildren
# rubocop:disable Style/ClassVars
@ -51,7 +54,14 @@ module VirtualMachineManagerKVM
next unless m
ENV[m[2]] = m[3].delete("\n") if m[2] && m[3]
k,v = m[2],m[3].strip
# remove single or double quotes
if !v.empty? && v[0] == v[-1] && ["'", '"'].include?(v[0])
v = v.slice(1, v.length-2)
end
ENV[k] = v.delete("\n") if k && v
end
rescue StandardError
end
@ -64,6 +74,323 @@ module VirtualMachineManagerKVM
"virsh --connect #{uri}"
end
# --------------------------------------------------------------------------
# This class abstracts the information and several methods to operate over
# qcow2 disk images files
# --------------------------------------------------------------------------
class QemuImg
attr_reader :path
def initialize(path)
@_info = nil
@path = path
end
#@return[Array] with major, minor and micro qemu-img version numbers
def self.version
out, _err, _rc = Open3.capture3("qemu-img --version")
m = out.lines.first.match(/([0-9]+)\.([0-9]+)\.([0-9]+)/)
return "0000000" unless m
[m[1], m[2], m[3]]
end
# qemu-img command methods
# @param args[String] non option argument
# @param opts[Hash] options arguments:
# -keys options as symbols, e.g.
# :o '-o'
# :disks '--disks'
# :disk_only '--disk-only'
# :map= '--map= '
# -values option values, can be empty
QEMU_IMG_COMMANDS = [
'convert',
'create',
'rebase',
'info',
'bitmap',
'commit'
]
QEMU_IMG_COMMANDS.each do |command|
define_method(command.to_sym) do |args = '', opts|
cmd_str = "qemu-img #{command}"
opts.each do |key, value|
next if key == :stdin_data
if key.length == 1
cmd_str << " -#{key}"
else
cmd_str << " --#{key.to_s.gsub('_', '-')}"
end
if value && !value.empty?
cmd_str << ' ' if key[-1] != '='
cmd_str << value.to_s
end
end
out, err, rc = Open3.capture3("#{cmd_str} #{@path} #{args}",
:stdin_data => opts[:stdin_data])
if rc.exitstatus != 0
msg = "Error executing: #{cmd_str} #{@path} #{args}\n"
msg << "\t[stderr]: #{err}" unless err.empty?
msg << "\t[stdout]: #{out}" unless out.empty?
raise StandardError, msg
end
out
end
end
# Access image attribute
def [](key)
if !@_info
out = info(:output => 'json', :force_share => '')
@_info = JSON.parse(out)
end
@_info[key]
end
end
#
# This class provides abstractions to access KVM/Qemu libvirt domains
#
class KvmDomain
def initialize(domain)
@domain = domain
@xml = nil
out, err, rc = Open3.capture3("#{virsh} dumpxml #{@domain}")
if out.nil? || out.empty? || rc.exitstatus != 0
raise StandardError, "Error getting domain info #{err}"
end
@xml = XMLElement.new_s out
@snapshots = []
@snapshots_current = nil
@disks = nil
end
def [](xpath)
@xml[xpath]
end
def exist?(xpath)
@xml.exist?(xpath)
end
# ---------------------------------------------------------------------
# VM system snapshots interface
# ---------------------------------------------------------------------
# Get the system snapshots of a domain
#
# @param query[Boolean] refresh the snapshot information by querying
# libvirtd daemon
#
# @return [Array] the array of snapshots name, and the current snapshot
# (if any)
def snapshots(query = true)
return [@snapshots, @snapshots_current] unless query
o, e, rc = Open3.capture3("#{virsh} snapshot-list #{@domain} --name")
if rc.exitstatus != 0
raise StandardError, "Error getting domain snapshots #{e}"
end
@snapshots = o.lines.map { |l| l.strip! }
@snapshots.reject! {|s| s.empty? }
return [@snapshots, nil] if @snapshots.empty?
o, e, rc = Open3.capture3("#{virsh} snapshot-current #{@domain} --name")
@snapshots_current = o.strip if rc.exitstatus == 0
[@snapshots, @snapshots_current]
end
# Delete domain metadata snapshots.
# @param query[Boolean] update the snapshot list of the domain
def snapshots_delete(query = true)
snapshots(query)
delete = "#{virsh} snapshot-delete #{@domain} --metadata --snapshotname"
@snapshots.each do |snap|
Command.execute_log("#{delete} #{snap}")
end
end
# Redefine system snapshots on the destination libvirtd, the internal
# list needs to be bootstraped by using the snapshots or snapshots_delete
#
# @param host[String] where the snapshots will be defined
# @param dir[String] VM folder path to look for the XML snapshot
# metadata files
def snapshots_redefine(host, dir)
define = "#{virsh_cmd(host)} snapshot-create --redefine #{@domain}"
current = "#{virsh_cmd(host)} snapshot-current #{@domain}"
@snapshots.each do |snap|
Command.execute_log("#{define} #{dir}/#{snap}.xml")
end
if @snapshots_current
Command.execute_log("#{current} #{@snapshots_current}")
end
end
# ---------------------------------------------------------------------
# vm disk interface
# ---------------------------------------------------------------------
# Gets the list of disks of a domain as an Array of [dev, path] pairs
# - dev is the device name of the blk, e.g. vda, sdb...
# - path of the file for the virtual disk
def disks
if !@disks
o, e, rc = Open3.capture3("#{virsh} domblklist #{@domain}")
if rc.exitstatus != 0
raise StandardError, "Error getting domain snapshots #{e}"
end
@disks = o.lines[2..].map { |l| l.split }
@disks.reject! {|s| s.empty? }
end
@disks
end
# @return [Boolean] true if the disk (by path) is readonly
def readonly?(disk_path)
exist? "//domain/devices/disk[source/@file='#{disk_path}']/readonly"
end
# ---------------------------------------------------------------------
# domain operations
# ---------------------------------------------------------------------
# Live migrate the domain to the target host (SHARED STORAGE variant)
# @param host[String] name of the target host
def live_migrate(host)
cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} #{@domain}"
cmd << " #{virsh_uri(host)}"
virsh_retry(cmd, 'active block job', virsh_tries)
end
# Live migrate the domain to the target host (LOCAL STORAGE variant)
# @param host[String] name of the target host
# @param devs[Array] of the disks that will be copied
def live_migrate_disks(host, devs)
cmd = "migrate --live #{ENV['MIGRATE_OPTIONS']} --suspend"
cmd << " #{@domain} #{virsh_uri(host)}"
if !devs.empty?
cmd << " --copy-storage-all --migrate-disks #{devs.join(',')}"
end
virsh_retry(cmd, 'active block job', virsh_tries)
end
# Basic domain operations (does not require additional parameters)
VIRSH_COMMANDS = [
'resume',
'destroy',
'undefine'
]
VIRSH_COMMANDS.each do |command|
define_method(command.to_sym) do |host = nil|
Command.execute_log("#{virsh_cmd(host)} #{command} #{@domain}")
end
end
# ---------------------------------------------------------------------
# Private function helpers
# ---------------------------------------------------------------------
private
# @return [Integer] number of retries for virsh operations
def virsh_tries
vt = ENV['VIRSH_TRIES'].to_i
vt = 3 if vt == 0
vt
end
# @return [String] including the --connect attribute to run virsh commands
def virsh_cmd(host = nil)
if host
"virsh --connect #{virsh_uri(host)}"
else
virsh
end
end
# @return [String] to contact libvirtd in a host
def virsh_uri(host)
proto = ENV['QEMU_PROTOCOL']
proto ||= 'qemu+ssh'
"#{proto}://#{host}/system"
end
# Retries a virsh operation if the returned error matches the provided
# one,
#
# @param cmd[String] the virsh command arguments
# @param no_error_str[String] when stderr matches the string the operation
# will be retried
# @param tries[Integer] number of tries
# @param secs[Integer] seconds to wait between tries
def virsh_retry(cmd, no_error_str, tries = 1, secs = 5)
out, err, rc = nil
tini = Time.now
loop do
tries = tries - 1
out, err, rc = Open3.capture3("#{virsh} #{cmd}")
break if rc.exitstatus == 0
match = err.match(/#{no_error_str}/)
break if tries == 0 || !match
sleep(secs)
end
if rc.exitstatus == 0
STDERR.puts "#{virsh} #{cmd} (#{Time.now-tini}s)"
else
STDERR.puts "Error executing: #{virsh} #{cmd} (#{Time.now-tini}s)"
STDERR.puts "\t[stdout]: #{out}" unless out.empty?
STDERR.puts "\t[stderr]: #{err}" unless err.empty?
end
[rc.exitstatus, out, err]
end
end
#---------------------------------------------------------------------------
# OpenNebula KVM Virtual Machine
#---------------------------------------------------------------------------