From 276f093073cb732e011c63d3fda381a0b674d857 Mon Sep 17 00:00:00 2001 From: "Ruben S. Montero" Date: Sat, 21 Jan 2023 13:36:59 +0100 Subject: [PATCH] F #6057: Confine backup commands - New TransferManager::Datastore class with confine helpers - "Confinement" methods for backup file preparation and backup: * ionice/nice * systemd slice - IONICE/NICE * Execute commands under a given nice and ionice (class 2) * The following variables can be set - RESTIC_NICE - RESTIC_IONICE - RSYNC_NICE - RSYNC_IONICE - Systemd Slice * A user slice is created for each datastore that set: - CPUQuota - IOReadIOPSMax - IOWriteIOPSMax This requires delegation of io/cpu/cpuset controllers to oneadmin Also VM folder needs to be local (e.g. not an NFS volume). * Commands are passed specific environment (e.g. SSH agent socket) * The following variables can be set: - RESTIC_MAX_RIOPS - RESTIC_MAX_WIOPS - RESTIC_CPU_QUOTA - RSYNC_MAX_RIOPS - RSYNC_MAX_WIOPS - RSYNC_CPU_QUOTA The new interface is added to file (qcow2/shared/ssh) and ceph TM drivers. --- install.sh | 1 + src/datastore_mad/remotes/rsync/backup | 76 +++++++----- src/tm_mad/ceph/prebackup | 10 +- src/tm_mad/ceph/prebackup_live | 10 +- src/tm_mad/lib/datastore.rb | 159 +++++++++++++++++++++++++ src/tm_mad/qcow2/prebackup | 7 +- src/tm_mad/qcow2/prebackup_live | 7 +- 7 files changed, 230 insertions(+), 40 deletions(-) create mode 100644 src/tm_mad/lib/datastore.rb diff --git a/install.sh b/install.sh index f79bc8da67..1aabc62daa 100755 --- a/install.sh +++ b/install.sh @@ -1925,6 +1925,7 @@ TM_FILES="src/tm_mad/tm_common.sh" TM_LIB_FILES="src/tm_mad/lib/kvm.rb \ src/tm_mad/lib/tm_action.rb \ src/tm_mad/lib/backup_qcow2.rb \ + src/tm_mad/lib/datastore.rb \ src/tm_mad/lib/backup.rb" TM_SHARED_FILES="src/tm_mad/shared/clone \ diff --git a/src/datastore_mad/remotes/rsync/backup b/src/datastore_mad/remotes/rsync/backup index 2b18a13788..738c21f5ae 100755 --- a/src/datastore_mad/remotes/rsync/backup +++ b/src/datastore_mad/remotes/rsync/backup @@ -57,6 +57,7 @@ require 'securerandom' require 'pathname' require_relative '../../tm/lib/tm_action' +require_relative '../../tm/lib/datastore' # BACKUP host:remote_dir DISK_ID:..:DISK_ID deploy_id vmid dsid @@ -69,18 +70,28 @@ vmid = ARGV[3] _dsid = ARGV[4] vm_host = dir[0] -vm_dir = Pathname.new(dir[1]+'/backup/').cleanpath.to_s -ds = REXML::Document.new(ds_xml).root +rdir = Pathname.new(dir[1]).cleanpath.to_s +vm_dir = Pathname.new("#{rdir}/backup/").cleanpath.to_s -rsync_user = ds.elements['TEMPLATE/RSYNC_USER'].text -rsync_host = ds.elements['TEMPLATE/RSYNC_HOST'].text -base = ds.elements['BASE_PATH'].text +begin + ds = REXML::Document.new(ds_xml).root -if ds.elements['TEMPLATE/RSYNC_ARGS'].nil? - args = '-aS' -else - args = ds.elements['TEMPLATE/RSYNC_ARGS'].text + rsync_user = ds.elements['TEMPLATE/RSYNC_USER'].text + rsync_host = ds.elements['TEMPLATE/RSYNC_HOST'].text + + base = ds.elements['BASE_PATH'].text + + if ds.elements['TEMPLATE/RSYNC_ARGS'].nil? + args = '-aS' + else + args = ds.elements['TEMPLATE/RSYNC_ARGS'].text + end + + ds = TransferManager::Datastore.new(:ds_xml => ds_xml) +rescue StandardError => e + STDERR.puts e.message + exit(1) end path = Pathname.new(base).cleanpath.to_s @@ -88,45 +99,44 @@ path = Pathname.new(base).cleanpath.to_s backup_id = SecureRandom.hex[0, 6].to_s backup_path = "#{path}/#{vmid}/#{backup_id}/" -#------------------------------------------------------------------------------- -# Compute backup total size -#------------------------------------------------------------------------------- - -rc = TransferManager::Action.ssh('backup_size', - :host => vm_host, - :cmds => "du -sm #{vm_dir}", - :forward => true, - :nostdout => false) - -backup_size = rc.stdout.split[0] - -if rc.code != 0 - exit rc.code -end - #------------------------------------------------------------------------------- # Rsync backup files to server: # 1. [rsync server] make backup path -# 2. [host] rsync files +# 2. [host] Compute backup total size & rsync files #------------------------------------------------------------------------------- rc = TransferManager::Action.ssh('make_dst_path', :host => "#{rsync_user}@#{rsync_host}", - :cmds => "mkdir -p #{backup_path}") + :cmds => "mkdir -p #{backup_path}", + :nostderr => false, + :nostdout => false) -if rc != 0 - exit rc +if rc.code != 0 + STDERR.puts "Error making rsync dst path: #{rc.stdout} #{rc.stderr}" + exit(-1) end cmd = "rsync #{args} #{vm_dir}/ #{rsync_user}@#{rsync_host}:#{backup_path}" +script = <<~EOS + set -ex -o pipefail + + BKSIZE=`du -sm #{vm_dir}` + + #{ds.cmd_confinement(cmd, rdir, ['SSH_AUTH_SOCK'])} > /dev/null + + echo $BKSIZE +EOS + rc = TransferManager::Action.ssh('backup', :host => vm_host, - :cmds => cmd, + :cmds => script, :forward => true, + :nostderr => false, :nostdout => false) -if rc.code != 0 - exit rc.code +if rc.code != 0 || rc.stdout.empty? + STDERR.puts "Error copying backup: #{rc.stdout} #{rc.stderr}" + exit(-1) end -puts "#{backup_id} #{backup_size}" +puts "#{backup_id} #{rc.stdout.split[0]}" diff --git a/src/tm_mad/ceph/prebackup b/src/tm_mad/ceph/prebackup index 791763da82..1ff265aba6 100755 --- a/src/tm_mad/ceph/prebackup +++ b/src/tm_mad/ceph/prebackup @@ -55,6 +55,7 @@ require 'rexml/document' require_relative '../lib/tm_action' require_relative '../lib/kvm' +require_relative '../lib/datastore' #------------------------------------------------------------------------------- # Helper functions @@ -83,13 +84,14 @@ rdir = dir[1] xml_doc = REXML::Document.new(vm_xml) vm = xml_doc.root +ds = TransferManager::Datastore.new(:vm_xml => vm_xml) + bck_dir = "#{rdir}/backup" expo_cmd = '' conv_cmd = '' clup_cmd = '' -# TODO: Make compression configurable. Does it intefere with deduplication? vm.elements.each 'TEMPLATE/DISK' do |d| did = d.elements['DISK_ID'].text next unless disks.include? did @@ -110,9 +112,11 @@ vm.elements.each 'TEMPLATE/DISK' do |d| draw = "#{bck_dir}/disk.#{did}.raw" ddst = "#{bck_dir}/disk.#{did}.0" - expo_cmd << "#{cmd} export #{rbd_src} #{draw}\n" + ceph_cmd = "#{cmd} export #{rbd_src} #{draw}\n" + expo_cmd << ds.cmd_confinement(ceph_cmd, rdir) - conv_cmd << "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n" + qemu_cmd = "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n" + conv_cmd << ds.cmd_confinement(qemu_cmd, rdir) clup_cmd << "rm -f #{draw}\n" diff --git a/src/tm_mad/ceph/prebackup_live b/src/tm_mad/ceph/prebackup_live index d97197d4e5..b195f8a1e1 100755 --- a/src/tm_mad/ceph/prebackup_live +++ b/src/tm_mad/ceph/prebackup_live @@ -55,6 +55,7 @@ require 'rexml/document' require_relative '../lib/tm_action' require_relative '../lib/kvm' +require_relative '../lib/datastore' # TODO: fsfreeze for each hypervisor based on VM_MAD include TransferManager::KVM @@ -86,6 +87,8 @@ rdir = dir[1] xml_doc = REXML::Document.new(vm_xml) vm = xml_doc.root +ds = TransferManager::Datastore.new(:vm_xml => vm_xml) + bck_dir = "#{rdir}/backup" snap_cmd = '' @@ -114,9 +117,12 @@ vm.elements.each 'TEMPLATE/DISK' do |d| ddst = "#{bck_dir}/disk.#{did}.0" snap_cmd << "#{cmd} snap create #{rbd_src}@backup\n" - expo_cmd << "#{cmd} export #{rbd_src}@backup #{draw}\n" - conv_cmd << "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n" + ceph_cmd = "#{cmd} export #{rbd_src}@backup #{draw}\n" + expo_cmd << ds.cmd_confinement(ceph_cmd, rdir) + + qemu_cmd = "qemu-img convert -m 4 -O qcow2 #{draw} #{ddst}\n" + conv_cmd << ds.cmd_confinement(qemu_cmd, rdir) clup_cmd << "#{cmd} snap rm #{rbd_src}@backup\n" clup_cmd << "rm -f #{draw}\n" diff --git a/src/tm_mad/lib/datastore.rb b/src/tm_mad/lib/datastore.rb new file mode 100644 index 0000000000..a8be57d793 --- /dev/null +++ b/src/tm_mad/lib/datastore.rb @@ -0,0 +1,159 @@ +#!/usr/bin/env ruby + +# -------------------------------------------------------------------------- # +# Copyright 2002-2022, OpenNebula Project, OpenNebula Systems # +# # +# Licensed under the Apache License, Version 2.0 (the "License"); you may # +# not use this file except in compliance with the License. You may obtain # +# a copy of the License at # +# # +# http://www.apache.org/licenses/LICENSE-2.0 # +# # +# Unless required by applicable law or agreed to in writing, software # +# distributed under the License is distributed on an "AS IS" BASIS, # +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # +# See the License for the specific language governing permissions and # +# limitations under the License. # +#--------------------------------------------------------------------------- # +require 'securerandom' +require 'pathname' +require 'opennebula' +require 'rexml/document' + +module TransferManager + + # Generic class that represent an OpenNebula Datastore. This helper class + # lets you get datastore attributes + class Datastore + + # Constants for wrapper commands + SYSTEMD_RUN = 'systemd-run --user --quiet --pipe --collect --wait --same-dir' + IONICE = 'ionice' + NICE = 'nice' + + # file descriptor for lock + FD = 13 + + # Initialize OpenNebula object and get its information + def initialize(options = {}) + @options={ + :client => nil, + :vm_xml => '', + :ds_prefix => '', + :ds_xml => '' + }.merge!(options) + + @one = @options[:client] + @one ||= OpenNebula::Client.new + + if @options[:ds_xml].empty? + vm = REXML::Document.new(@options[:vm_xml]).root + did = vm.elements['BACKUPS/BACKUP_CONFIG/LAST_DATASTORE_ID'] + + @ds = OpenNebula::Datastore.new_with_id(did.text.to_i, @one) + rc = @ds.info + + raise rc.message.to_s if OpenNebula.is_error?(rc) + else + xml = OpenNebula::XMLElement.build_xml(@options[:ds_xml], 'DATASTORE') + @ds = OpenNebula::Datastore.new(xml, @one) + end + + @mad = self['DS_MAD', ''].upcase + end + + def [](xpath, default = '') + v = @ds[xpath] + + return default if !v || v == '' + + v + end + + # Confine the datastore command. It will try first to use systemd slices + # and then nice/ionice + # @param[String] cmd is the command to execute + # @param[String] vm_dir_path used to set IO limits on the system ds + # block device + def cmd_confinement(cmd, vm_dir, env_a = []) + ccmd = systemd_cmd(cmd, vm_dir, env_a) + + return ccmd if ccmd != cmd + + nice_cmd(cmd) + end + + # Confine the datastore command in a systemd slice. + # @param[String] cmd is the command to execute + # @param[String] vm_dir_path used to set IO limits on the system ds + # block device + # + # The slice can set the following resources: + # - CPUQuota + # - IOReadIOPSMax + # - IOWriteIOPSMax + # + # This requires delegation of io/cpu/cpuset controllers to oneadmin + # vmdir needs to be local (e.g. not an NFS volume) + # + # Access to user systemd requires: + # export XDG_RUNTIME_DIR="/run/user/$UID" + # export DBUS_SESSION_BUS_ADDRESS="unix:path=${XDG_RUNTIME_DIR}/bus" + def systemd_cmd(cmd, vm_dir, env_a) + return cmd if @mad.empty? + + riops = Integer(self["TEMPLATE/#{@mad}_MAX_RIOPS", -1]) + wiops = Integer(self["TEMPLATE/#{@mad}_MAX_WIOPS", -1]) + cpuq = Integer(self["TEMPLATE/#{@mad}_CPU_QUOTA", -1]) + + return cmd if riops == -1 && wiops == -1 && cpuq == -1 + + vm_path = Pathname.new(vm_dir) + bpath = vm_path.cleanpath + + env_opts = '' + env_a.each {|e| env_opts += " --setenv=\"#{e}=$#{e}\"" } + + # Create a slice for backup processes (per backup datastore) + spath = '~/.config/systemd/user' + sname = "backup.#{@ds.id}.slice" + + slice =<<~EOS + [Slice] + CPUQuota=#{cpuq == -1? '': "#{cpuq}%"} + IOReadIOPSMax=#{riops == -1? '': "#{bpath} #{riops}"} + IOWriteIOPSMax=#{wiops == -1? '': "#{bpath} #{wiops}"} + EOS + + <<~EOS + mkdir -p #{spath} + + (flock -n #{FD} || exit 1 + echo '#{slice}' > #{spath}/#{sname} + systemctl --user daemon-reload + ) #{FD}> #{spath}/.lock + + #{SYSTEMD_RUN} #{env_opts} --slice=#{sname} #{cmd} + EOS + end + + # Generate a "nice" command + # @param[String] cmd is the command to execute + def nice_cmd(cmd) + return cmd if @mad.empty? + + ionice = Integer(self["TEMPLATE/#{@mad}_IONICE", -1]) + nice = Integer(self["TEMPLATE/#{@mad}_NICE", -1]) + + return cmd if ionice == -1 && nice == -1 + + rcmd = '' + + rcmd << "#{NICE} -n #{nice} " if nice != -1 + rcmd << "#{IONICE} -c2 -n#{ionice} " if ionice != -1 + rcmd << cmd + end + + end + +end diff --git a/src/tm_mad/qcow2/prebackup b/src/tm_mad/qcow2/prebackup index 24382e67cd..f05cdac0c6 100755 --- a/src/tm_mad/qcow2/prebackup +++ b/src/tm_mad/qcow2/prebackup @@ -52,6 +52,7 @@ end $LOAD_PATH << RUBY_LIB_LOCATION require_relative '../lib/tm_action' +require_relative '../lib/datastore' require 'rexml/document' @@ -71,6 +72,10 @@ rdir = dir[1] bck_dir = "#{rdir}/backup" qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb' +qcow2_cmd = "#{qcow2_util} -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}" + +ds = TransferManager::Datastore.new(:vm_xml => vm_xml) +cmd = ds.cmd_confinement(qcow2_cmd, rdir) pre_script = <<~EOS set -ex -o pipefail @@ -87,7 +92,7 @@ pre_script = <<~EOS # ------------------- # Convert and flatten # ------------------- - #{qcow2_util} -d "#{disks}" -x #{bck_dir}/vm.xml -p #{rdir} + #{cmd} EOS rc = TransferManager::Action.ssh('prebackup', diff --git a/src/tm_mad/qcow2/prebackup_live b/src/tm_mad/qcow2/prebackup_live index fb09f71f76..3acd64d6fe 100755 --- a/src/tm_mad/qcow2/prebackup_live +++ b/src/tm_mad/qcow2/prebackup_live @@ -55,6 +55,7 @@ require 'rexml/document' require_relative '../lib/tm_action' require_relative '../lib/kvm' +require_relative '../lib/datastore' # TODO: fsfreeze for each hypervisor based on VM_MAD include TransferManager::KVM @@ -77,6 +78,10 @@ tmp_dir = "#{rdir}/tmp" bck_dir = "#{rdir}/backup" qcow2_util = '/var/tmp/one/tm/lib/backup_qcow2.rb' +qcow2_cmd = "#{qcow2_util} -l -d \"#{disks}\" -x #{bck_dir}/vm.xml -p #{rdir}" + +ds = TransferManager::Datastore.new(:vm_xml => vm_xml) +cmd = ds.cmd_confinement(qcow2_cmd, rdir) pre_script = <<~EOS set -ex -o pipefail @@ -97,7 +102,7 @@ pre_script = <<~EOS # -------------------------------------- # Create backup live # -------------------------------------- - #{qcow2_util} -l -d "#{disks}" -x #{bck_dir}/vm.xml -p #{rdir} + #{cmd} EOS rc = TransferManager::Action.ssh('prebackup_live',