1
0
mirror of https://github.com/OpenNebula/one.git synced 2024-12-22 13:33:52 +03:00

F #6057: Confine backup commands

- New TransferManager::Datastore class with confine helpers
- "Confinement" methods for backup file preparation and backup:
  * ionice/nice
  * systemd slice

- IONICE/NICE
  * Execute commands under a given nice and ionice (class 2)
  * The following variables can be set
    - RESTIC_NICE
    - RESTIC_IONICE
    - RSYNC_NICE
    - RSYNC_IONICE

- Systemd Slice
  * A user slice is created for each datastore that set:
    - CPUQuota
    - IOReadIOPSMax
    - IOWriteIOPSMax

    This requires delegation of io/cpu/cpuset controllers to oneadmin
    Also VM folder needs to be local (e.g. not an NFS volume).
  * Commands are passed specific environment (e.g. SSH agent socket)
  * The following variables can be set:
    - RESTIC_MAX_RIOPS
    - RESTIC_MAX_WIOPS
    - RESTIC_CPU_QUOTA
    - RSYNC_MAX_RIOPS
    - RSYNC_MAX_WIOPS
    - RSYNC_CPU_QUOTA

The new interface is added to file (qcow2/shared/ssh) and ceph TM
drivers.

(cherry picked from commit 276f093073)
This commit is contained in:
Ruben S. Montero 2023-01-21 13:36:59 +01:00
parent 05d17aa6b3
commit 9489d0aac2
No known key found for this signature in database
GPG Key ID: A0CEA6FA880A1D87
7 changed files with 230 additions and 40 deletions

View File

@ -1925,6 +1925,7 @@ TM_FILES="src/tm_mad/tm_common.sh"
TM_LIB_FILES="src/tm_mad/lib/kvm.rb \
src/tm_mad/lib/tm_action.rb \
src/tm_mad/lib/backup_qcow2.rb \
src/tm_mad/lib/datastore.rb \
src/tm_mad/lib/backup.rb"
TM_SHARED_FILES="src/tm_mad/shared/clone \

View File

@ -57,6 +57,7 @@ require 'securerandom'
require 'pathname'
require_relative '../../tm/lib/tm_action'
require_relative '../../tm/lib/datastore'
# BACKUP host:remote_dir DISK_ID:..:DISK_ID deploy_id vmid dsid
@ -69,18 +70,28 @@ vmid = ARGV[3]
_dsid = ARGV[4]
vm_host = dir[0]
vm_dir = Pathname.new(dir[1]+'/backup/').cleanpath.to_s
ds = REXML::Document.new(ds_xml).root
rdir = Pathname.new(dir[1]).cleanpath.to_s
vm_dir = Pathname.new("#{rdir}/backup/").cleanpath.to_s
rsync_user = ds.elements['TEMPLATE/RSYNC_USER'].text
rsync_host = ds.elements['TEMPLATE/RSYNC_HOST'].text
base = ds.elements['BASE_PATH'].text
begin
ds = REXML::Document.new(ds_xml).root
if ds.elements['TEMPLATE/RSYNC_ARGS'].nil?
args = '-aS'
else
args = ds.elements['TEMPLATE/RSYNC_ARGS'].text
rsync_user = ds.elements['TEMPLATE/RSYNC_USER'].text
rsync_host = ds.elements['TEMPLATE/RSYNC_HOST'].text
base = ds.elements['BASE_PATH'].text
if ds.elements['TEMPLATE/RSYNC_ARGS'].nil?
args = '-aS'
else
args = ds.elements['TEMPLATE/RSYNC_ARGS'].text
end
ds = TransferManager::Datastore.new(:ds_xml => ds_xml)
rescue StandardError => e
STDERR.puts e.message
exit(1)
end
path = Pathname.new(base).cleanpath.to_s
@ -88,45 +99,44 @@ path = Pathname.new(base).cleanpath.to_s
backup_id = SecureRandom.hex[0, 6].to_s
backup_path = "#{path}/#{vmid}/#{backup_id}/"
#-------------------------------------------------------------------------------
# Compute backup total size
#-------------------------------------------------------------------------------
rc = TransferManager::Action.ssh('backup_size',
:host => vm_host,
:cmds => "du -sm #{vm_dir}",
:forward => true,
:nostdout => false)
backup_size = rc.stdout.split[0]
if rc.code != 0
exit rc.code
end
#-------------------------------------------------------------------------------
# Rsync backup files to server:
# 1. [rsync server] make backup path
# 2. [host] rsync files
# 2. [host] Compute backup total size & rsync files
#-------------------------------------------------------------------------------
rc = TransferManager::Action.ssh('make_dst_path',
:host => "#{rsync_user}@#{rsync_host}",
:cmds => "mkdir -p #{backup_path}")
:cmds => "mkdir -p #{backup_path}",
:nostderr => false,
:nostdout => false)
if rc != 0
exit rc
if rc.code != 0
STDERR.puts "Error making rsync dst path: #{rc.stdout} #{rc.stderr}"
exit(-1)
end
cmd = "rsync #{args} #{vm_dir}/ #{rsync_user}@#{rsync_host}:#{backup_path}"
script = <<~EOS
set -ex -o pipefail
BKSIZE=`du -sm #{vm_dir}`
#{ds.cmd_confinement(cmd, rdir, ['SSH_AUTH_SOCK'])} > /dev/null
echo $BKSIZE
EOS
rc = TransferManager::Action.ssh('backup',
:host => vm_host,
:cmds => cmd,
:cmds => script,
:forward => true,
:nostderr => false,
:nostdout => false)
if rc.code != 0
exit rc.code
if rc.code != 0 || rc.stdout.empty?
STDERR.puts "Error copying backup: #{rc.stdout} #{rc.stderr}"
exit(-1)
end
puts "#{backup_id} #{backup_size}"
puts "#{backup_id} #{rc.stdout.split[0]}"

View File

@ -55,6 +55,7 @@ require 'rexml/document'
require_relative '../lib/tm_action'
require_relative '../lib/kvm'
require_relative '../lib/datastore'
#-------------------------------------------------------------------------------
# Helper functions
@ -83,13 +84,14 @@ rdir = dir[1]
xml_doc = REXML::Document.new(vm_xml)
vm = xml_doc.root
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
bck_dir = "#{rdir}/backup"
expo_cmd = ''
conv_cmd = ''
clup_cmd = ''
# TODO: Make compression configurable. Does it intefere with deduplication?
vm.elements.each 'TEMPLATE/DISK' do |d|
did = d.elements['DISK_ID'].text
next unless disks.include? did
@ -110,9 +112,11 @@ vm.elements.each 'TEMPLATE/DISK' do |d|
draw = "#{bck_dir}/disk.#{did}.raw"
ddst = "#{bck_dir}/disk.#{did}.0"
expo_cmd << "#{cmd} export #{rbd_src} #{draw}\n"
ceph_cmd = "#{cmd} export #{rbd_src} #{draw}\n"
expo_cmd << ds.cmd_confinement(ceph_cmd, rdir)
conv_cmd << "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n"
qemu_cmd = "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n"
conv_cmd << ds.cmd_confinement(qemu_cmd, rdir)
clup_cmd << "rm -f #{draw}\n"

View File

@ -55,6 +55,7 @@ require 'rexml/document'
require_relative '../lib/tm_action'
require_relative '../lib/kvm'
require_relative '../lib/datastore'
# TODO: fsfreeze for each hypervisor based on VM_MAD
include TransferManager::KVM
@ -86,6 +87,8 @@ rdir = dir[1]
xml_doc = REXML::Document.new(vm_xml)
vm = xml_doc.root
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
bck_dir = "#{rdir}/backup"
snap_cmd = ''
@ -114,9 +117,12 @@ vm.elements.each 'TEMPLATE/DISK' do |d|
ddst = "#{bck_dir}/disk.#{did}.0"
snap_cmd << "#{cmd} snap create #{rbd_src}@backup\n"
expo_cmd << "#{cmd} export #{rbd_src}@backup #{draw}\n"
conv_cmd << "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n"
ceph_cmd = "#{cmd} export #{rbd_src}@backup #{draw}\n"
expo_cmd << ds.cmd_confinement(ceph_cmd, rdir)
qemu_cmd = "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n"
conv_cmd << ds.cmd_confinement(qemu_cmd, rdir)
clup_cmd << "#{cmd} snap rm #{rbd_src}@backup\n"
clup_cmd << "rm -f #{draw}\n"

159
src/tm_mad/lib/datastore.rb Normal file
View File

@ -0,0 +1,159 @@
#!/usr/bin/env ruby
# -------------------------------------------------------------------------- #
# Copyright 2002-2022, OpenNebula Project, OpenNebula Systems #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
# not use this file except in compliance with the License. You may obtain #
# a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
#--------------------------------------------------------------------------- #
require 'securerandom'
require 'pathname'
require 'opennebula'
require 'rexml/document'
module TransferManager
# Generic class that represent an OpenNebula Datastore. This helper class
# lets you get datastore attributes
class Datastore
# Constants for wrapper commands
SYSTEMD_RUN = 'systemd-run --user --quiet --pipe --collect --wait --same-dir'
IONICE = 'ionice'
NICE = 'nice'
# file descriptor for lock
FD = 13
# Initialize OpenNebula object and get its information
def initialize(options = {})
@options={
:client => nil,
:vm_xml => '',
:ds_prefix => '',
:ds_xml => ''
}.merge!(options)
@one = @options[:client]
@one ||= OpenNebula::Client.new
if @options[:ds_xml].empty?
vm = REXML::Document.new(@options[:vm_xml]).root
did = vm.elements['BACKUPS/BACKUP_CONFIG/LAST_DATASTORE_ID']
@ds = OpenNebula::Datastore.new_with_id(did.text.to_i, @one)
rc = @ds.info
raise rc.message.to_s if OpenNebula.is_error?(rc)
else
xml = OpenNebula::XMLElement.build_xml(@options[:ds_xml], 'DATASTORE')
@ds = OpenNebula::Datastore.new(xml, @one)
end
@mad = self['DS_MAD', ''].upcase
end
def [](xpath, default = '')
v = @ds[xpath]
return default if !v || v == ''
v
end
# Confine the datastore command. It will try first to use systemd slices
# and then nice/ionice
# @param[String] cmd is the command to execute
# @param[String] vm_dir_path used to set IO limits on the system ds
# block device
def cmd_confinement(cmd, vm_dir, env_a = [])
ccmd = systemd_cmd(cmd, vm_dir, env_a)
return ccmd if ccmd != cmd
nice_cmd(cmd)
end
# Confine the datastore command in a systemd slice.
# @param[String] cmd is the command to execute
# @param[String] vm_dir_path used to set IO limits on the system ds
# block device
#
# The slice can set the following resources:
# - CPUQuota
# - IOReadIOPSMax
# - IOWriteIOPSMax
#
# This requires delegation of io/cpu/cpuset controllers to oneadmin
# vmdir needs to be local (e.g. not an NFS volume)
#
# Access to user systemd requires:
# export XDG_RUNTIME_DIR="/run/user/$UID"
# export DBUS_SESSION_BUS_ADDRESS="unix:path=${XDG_RUNTIME_DIR}/bus"
def systemd_cmd(cmd, vm_dir, env_a)
return cmd if @mad.empty?
riops = Integer(self["TEMPLATE/#{@mad}_MAX_RIOPS", -1])
wiops = Integer(self["TEMPLATE/#{@mad}_MAX_WIOPS", -1])
cpuq = Integer(self["TEMPLATE/#{@mad}_CPU_QUOTA", -1])
return cmd if riops == -1 && wiops == -1 && cpuq == -1
vm_path = Pathname.new(vm_dir)
bpath = vm_path.cleanpath
env_opts = ''
env_a.each {|e| env_opts += " --setenv=\"#{e}=$#{e}\"" }
# Create a slice for backup processes (per backup datastore)
spath = '~/.config/systemd/user'
sname = "backup.#{@ds.id}.slice"
slice =<<~EOS
[Slice]
CPUQuota=#{cpuq == -1? '': "#{cpuq}%"}
IOReadIOPSMax=#{riops == -1? '': "#{bpath} #{riops}"}
IOWriteIOPSMax=#{wiops == -1? '': "#{bpath} #{wiops}"}
EOS
<<~EOS
mkdir -p #{spath}
(flock -n #{FD} || exit 1
echo '#{slice}' > #{spath}/#{sname}
systemctl --user daemon-reload
) #{FD}> #{spath}/.lock
#{SYSTEMD_RUN} #{env_opts} --slice=#{sname} #{cmd}
EOS
end
# Generate a "nice" command
# @param[String] cmd is the command to execute
def nice_cmd(cmd)
return cmd if @mad.empty?
ionice = Integer(self["TEMPLATE/#{@mad}_IONICE", -1])
nice = Integer(self["TEMPLATE/#{@mad}_NICE", -1])
return cmd if ionice == -1 && nice == -1
rcmd = ''
rcmd << "#{NICE} -n #{nice} " if nice != -1
rcmd << "#{IONICE} -c2 -n#{ionice} " if ionice != -1
rcmd << cmd
end
end
end

View File

@ -52,6 +52,7 @@ end
$LOAD_PATH << RUBY_LIB_LOCATION
require_relative '../lib/tm_action'
require_relative '../lib/datastore'
require 'rexml/document'
@ -71,6 +72,10 @@ rdir = dir[1]
bck_dir = "#{rdir}/backup"
qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb'
qcow2_cmd = "#{qcow2_util} -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}"
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
cmd = ds.cmd_confinement(qcow2_cmd, rdir)
pre_script = <<~EOS
set -ex -o pipefail
@ -87,7 +92,7 @@ pre_script = <<~EOS
# -------------------
# Convert and flatten
# -------------------
#{qcow2_util} -d "#{disks}" -x #{bck_dir}/vm.xml -p #{rdir}
#{cmd}
EOS
rc = TransferManager::Action.ssh('prebackup',

View File

@ -55,6 +55,7 @@ require 'rexml/document'
require_relative '../lib/tm_action'
require_relative '../lib/kvm'
require_relative '../lib/datastore'
# TODO: fsfreeze for each hypervisor based on VM_MAD
include TransferManager::KVM
@ -77,6 +78,10 @@ tmp_dir = "#{rdir}/tmp"
bck_dir = "#{rdir}/backup"
qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb'
qcow2_cmd = "#{qcow2_util} -l -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}"
ds = TransferManager::Datastore.new(:vm_xml => vm_xml)
cmd = ds.cmd_confinement(qcow2_cmd, rdir)
pre_script = <<~EOS
set -ex -o pipefail
@ -97,7 +102,7 @@ pre_script = <<~EOS
# --------------------------------------
# Create backup live
# --------------------------------------
#{qcow2_util} -l -d "#{disks}" -x #{bck_dir}/vm.xml -p #{rdir}
#{cmd}
EOS
rc = TransferManager::Action.ssh('prebackup_live',