mirror of
https://github.com/OpenNebula/one.git
synced 2025-03-25 02:50:08 +03:00
F #6030: Cancel backup in pre-backup phase
* New Cancel module to backup_qcow2.rb script. This module is a helper to cancel ongoing backup operations in the prebackup phase. * Checks libvirt for active backup domain operations to stop them. Remove @ongoing flag * Trap SIGTERM in backup_qcow2, prebackup and prebackup_live scripts * Cleaner backup func in onevm_exec.rb (reuse post array) co-authored-by: Michal Opala <mopala@opennebula.io>
This commit is contained in:
parent
43515c1fe4
commit
f5d6f11730
@ -64,12 +64,11 @@ vm_host = dir[0]
|
||||
vm_dir = Pathname.new(dir[1]).cleanpath.to_s
|
||||
|
||||
begin
|
||||
# Kill the backup script.
|
||||
# Kill the pre/backup/_live scripts.
|
||||
script = <<~EOS
|
||||
set -x -e -o pipefail; shopt -qs failglob
|
||||
(ps --no-headers -o pid,cmd -C ruby \
|
||||
| awk '$0 !~ "prebackup(_live)? .*#{vm_uuid} " { print }' \
|
||||
| awk '$0 ~ "backup .*#{vm_uuid} " { print $1 } END { print "\\0" }' || :) \\
|
||||
| awk '$0 ~ "(pre)?backup(_live)? .*#{vm_uuid} " { print $1 } END { print "\\0" }' || :) \\
|
||||
| (read -d '' PIDS
|
||||
[[ -n "$PIDS" ]] || exit 0 # empty
|
||||
[[ -z "${PIDS//[[:space:][:digit:]]/}" ]] || exit -1 # !integers
|
||||
@ -78,7 +77,7 @@ begin
|
||||
|
||||
rc = LocalCommand.run '/bin/bash -s', nil, script
|
||||
|
||||
raise StandardError, "Unable to stop rsync backup action: #{rc.stderr}" \
|
||||
raise StandardError, "Unable to stop rsync backup actions: #{rc.stderr}" \
|
||||
if rc.code != 0
|
||||
|
||||
# Kill the rsync process. This step is a failsafe in case the TERM signal handler
|
||||
|
@ -37,6 +37,7 @@ require_relative 'kvm'
|
||||
#
|
||||
# BDRV_MAX_REQUEST is the limit for the sieze of qemu-io operations
|
||||
#-------------------------------------------------------------------------------
|
||||
CMD_ARGV = [$PROGRAM_NAME] + ARGV
|
||||
LOG_FILE = nil
|
||||
QEMU_IO_OPEN = '-t none -i native -o driver=qcow2'
|
||||
IO_ASYNC = false
|
||||
@ -100,6 +101,69 @@ module Command
|
||||
|
||||
end
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Helper module to kill running processes
|
||||
#---------------------------------------------------------------------------
|
||||
module Cancel
|
||||
|
||||
extend Command
|
||||
|
||||
def self.find_task(select = /#{$PROGRAM_NAME}/)
|
||||
out = cmd('ps', '--no-headers -o pid,cmd -C ruby')
|
||||
|
||||
pids = out.lines.each_with_object([]) do |line, acc|
|
||||
line.strip!
|
||||
next if line.empty?
|
||||
|
||||
pid, command = line.split(' ', 2)
|
||||
next unless command.match?(select)
|
||||
|
||||
acc << pid.to_i
|
||||
end - [Process.pid]
|
||||
|
||||
raise StandardError, 'Too many tasks found, ambiguous result' if pids.size > 1
|
||||
|
||||
pids.first
|
||||
end
|
||||
|
||||
def self.find_subtasks(ppid, reject = / (blockcommit|snapshot-delete) /)
|
||||
begin
|
||||
out = cmd('ps', "--no-headers -o pid,cmd --ppid '#{ppid}'")
|
||||
rescue StandardError
|
||||
return []
|
||||
end
|
||||
|
||||
out.lines.each_with_object([]) do |line, acc|
|
||||
line.strip!
|
||||
next if line.empty?
|
||||
|
||||
pid, command = line.split(' ', 2)
|
||||
next if command.match?(reject)
|
||||
|
||||
acc << pid.to_i
|
||||
end - [Process.pid]
|
||||
end
|
||||
|
||||
def self.running?(vxml)
|
||||
ppid = find_task(/#{$PROGRAM_NAME}.*#{vxml}/)
|
||||
!ppid.nil?
|
||||
end
|
||||
|
||||
def self.killall(vxml, signal = :TERM)
|
||||
ppid = find_task(/#{$PROGRAM_NAME}.*#{vxml}/)
|
||||
|
||||
raise StandardError, 'Parent task not running' if ppid.nil?
|
||||
|
||||
pids = find_subtasks(ppid)
|
||||
|
||||
pids.each do |pid|
|
||||
log("[KIL]: sending #{signal} to pid=#{pid}")
|
||||
Process.kill(signal, pid)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
# Setup an NBD server to pull changes, an optional map can be provided
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -339,7 +403,7 @@ class KVMDomain
|
||||
include TransferManager::KVM
|
||||
include Command
|
||||
|
||||
attr_reader :parent_id, :backup_id, :checkpoint
|
||||
attr_reader :parent_id, :backup_id, :checkpoint, :tmp_dir, :bck_dir
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# @param vm[REXML::Document] OpenNebula XML VM information
|
||||
@ -383,7 +447,6 @@ class KVMDomain
|
||||
@socket = "#{opts[:vm_dir]}/backup.socket"
|
||||
|
||||
# State variables for domain operations
|
||||
@ongoing = false
|
||||
@frozen = nil
|
||||
end
|
||||
|
||||
@ -835,19 +898,25 @@ class KVMDomain
|
||||
opts[:checkpointxml] = check_path if checkpoint
|
||||
|
||||
cmd("#{virsh} backup-begin", @dom, opts)
|
||||
|
||||
@ongoing = true
|
||||
end
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Stop an ongoing Backup operation on the domain
|
||||
#---------------------------------------------------------------------------
|
||||
def stop_backup
|
||||
return unless @ongoing
|
||||
out = cmd("#{virsh} domjobinfo", @dom, {})
|
||||
|
||||
# Parse domjobinfo's output.
|
||||
job = out.lines.each_with_object({}) do |item, acc|
|
||||
key, value = item.split(':', 2)
|
||||
acc[key.strip] = value.strip unless value.nil?
|
||||
end
|
||||
|
||||
# Check if there is an ongoing backup operation.
|
||||
return unless job.key?('Job type') && job['Job type'] != 'None'
|
||||
return unless job['Operation'] == 'Backup'
|
||||
|
||||
cmd("#{virsh} domjobabort", @dom, {})
|
||||
ensure
|
||||
@ongoing = false
|
||||
end
|
||||
|
||||
end
|
||||
@ -890,6 +959,27 @@ begin
|
||||
exit(0)
|
||||
end
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Cancel logic. When SIGTERM is received it kills all subtasks and
|
||||
# terminates current backup operation
|
||||
#---------------------------------------------------------------------------
|
||||
pipe_r, pipe_w = IO.pipe
|
||||
|
||||
Thread.new do
|
||||
loop do
|
||||
rs, _ws, _es = IO.select([pipe_r])
|
||||
break if rs[0] == pipe_r
|
||||
end
|
||||
|
||||
Cancel.killall(vxml) if Cancel.running?(vxml)
|
||||
|
||||
exit(-1)
|
||||
end
|
||||
|
||||
Signal.trap(:TERM) do
|
||||
pipe_w.write 'W'
|
||||
end
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# Backup operation
|
||||
# - (live - full) Creates a snapshot to copy the disks via qemu-convert
|
||||
|
@ -75,6 +75,42 @@ bck_dir = "#{rdir}/backup"
|
||||
qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb'
|
||||
qcow2_cmd = "#{qcow2_util} -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}"
|
||||
|
||||
pipe_r, pipe_w = IO.pipe
|
||||
|
||||
Thread.new do
|
||||
loop do
|
||||
rs, _ws, _es = IO.select([pipe_r])
|
||||
break if rs[0] == pipe_r
|
||||
end
|
||||
|
||||
script = <<~EOS
|
||||
set -x -e -o pipefail; shopt -qs failglob
|
||||
(ps --no-headers -o pid,cmd -C ruby \
|
||||
| awk '$0 ~ "#{qcow2_util} .* -p #{rdir}" { print $1 } END { print "\\0" }' || :) \\
|
||||
| (read -d '' PIDS
|
||||
[[ -n "$PIDS" ]] || exit 0 # empty
|
||||
[[ -z "${PIDS//[[:space:][:digit:]]/}" ]] || exit -1 # !integers
|
||||
kill -s TERM $PIDS)
|
||||
EOS
|
||||
|
||||
TransferManager::Action.ssh 'prebackup_cancel',
|
||||
:host => rhost,
|
||||
:cmds => script,
|
||||
:nostdout => true,
|
||||
:nostderr => false
|
||||
|
||||
STDERR.puts "Prebackup cancelled: #{bck_dir}"
|
||||
STDERR.flush
|
||||
|
||||
# Suppress "`read': stream closed in another thread (IOError)".
|
||||
STDOUT.reopen IO::NULL
|
||||
STDERR.reopen IO::NULL
|
||||
end
|
||||
|
||||
Signal.trap(:TERM) do
|
||||
pipe_w.write 'W'
|
||||
end
|
||||
|
||||
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
|
||||
cmd = ds.cmd_confinement(qcow2_cmd, rdir)
|
||||
|
||||
|
@ -81,6 +81,42 @@ bck_dir = "#{rdir}/backup"
|
||||
qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb'
|
||||
qcow2_cmd = "#{qcow2_util} -l -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}"
|
||||
|
||||
pipe_r, pipe_w = IO.pipe
|
||||
|
||||
Thread.new do
|
||||
loop do
|
||||
rs, _ws, _es = IO.select([pipe_r])
|
||||
break if rs[0] == pipe_r
|
||||
end
|
||||
|
||||
script = <<~EOS
|
||||
set -x -e -o pipefail; shopt -qs failglob
|
||||
(ps --no-headers -o pid,cmd -C ruby \
|
||||
| awk '$0 ~ "#{qcow2_util} .* -p #{rdir}" { print $1 } END { print "\\0" }' || :) \\
|
||||
| (read -d '' PIDS
|
||||
[[ -n "$PIDS" ]] || exit 0 # empty
|
||||
[[ -z "${PIDS//[[:space:][:digit:]]/}" ]] || exit -1 # !integers
|
||||
kill -s TERM $PIDS)
|
||||
EOS
|
||||
|
||||
TransferManager::Action.ssh 'prebackup_live_cancel',
|
||||
:host => rhost,
|
||||
:cmds => script,
|
||||
:nostdout => true,
|
||||
:nostderr => false
|
||||
|
||||
STDERR.puts "Live prebackup cancelled: #{bck_dir}"
|
||||
STDERR.flush
|
||||
|
||||
# Suppress "`read': stream closed in another thread (IOError)".
|
||||
STDOUT.reopen IO::NULL
|
||||
STDERR.reopen IO::NULL
|
||||
end
|
||||
|
||||
Signal.trap(:TERM) do
|
||||
pipe_w.write 'W'
|
||||
end
|
||||
|
||||
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
|
||||
cmd = ds.cmd_confinement(qcow2_cmd, rdir)
|
||||
|
||||
|
@ -1335,31 +1335,7 @@ class ExecDriver < VirtualMachineDriver
|
||||
|
||||
vm_xml = xml_data.elements['/VMM_DRIVER_ACTION_DATA/VM']
|
||||
|
||||
# Backup operation steps
|
||||
# TODO: failover steps
|
||||
steps = [
|
||||
# Generate backup files for VM disks
|
||||
{
|
||||
:driver => :tm,
|
||||
:action => pre_name,
|
||||
:parameters => pre_tm,
|
||||
:stdin => vm_xml
|
||||
},
|
||||
# Upload backup files to repo
|
||||
{
|
||||
:driver => :ds,
|
||||
:action => :backup,
|
||||
:parameters => ds_command,
|
||||
:stdin => xml_data.elements['DATASTORE'].to_s,
|
||||
:fail_actions => [
|
||||
{
|
||||
:driver => :tm,
|
||||
:action => post_name,
|
||||
:parameters => post_tm,
|
||||
:stdin => vm_xml
|
||||
}
|
||||
]
|
||||
},
|
||||
cleanup_steps = [
|
||||
# Cleanup backup and tmp files
|
||||
{
|
||||
:driver => :tm,
|
||||
@ -1369,6 +1345,27 @@ class ExecDriver < VirtualMachineDriver
|
||||
}
|
||||
]
|
||||
|
||||
# Backup operation steps
|
||||
# TODO: failover steps
|
||||
steps = [
|
||||
# Generate backup files for VM disks
|
||||
{
|
||||
:driver => :tm,
|
||||
:action => pre_name,
|
||||
:parameters => pre_tm,
|
||||
:stdin => vm_xml,
|
||||
:fail_actions => cleanup_steps
|
||||
},
|
||||
# Upload backup files to repo
|
||||
{
|
||||
:driver => :ds,
|
||||
:action => :backup,
|
||||
:parameters => ds_command,
|
||||
:stdin => xml_data.elements['DATASTORE'].to_s,
|
||||
:fail_actions => cleanup_steps
|
||||
}
|
||||
] + cleanup_steps
|
||||
|
||||
action.run(steps)
|
||||
end
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user