2022-04-27 11:00:00 +02:00
package PVE::QemuServer ;
2011-08-23 07:47:04 +02:00
use strict ;
2013-10-01 13:14:49 +02:00
use warnings ;
2019-03-21 12:53:45 +01:00
2019-10-18 11:24:21 +02:00
use Cwd 'abs_path' ;
use Digest::SHA ;
use Fcntl ':flock' ;
use Fcntl ;
2011-08-23 07:47:04 +02:00
use File::Basename ;
2019-10-18 11:24:21 +02:00
use File::Copy qw( copy ) ;
2011-08-23 07:47:04 +02:00
use File::Path ;
use File::stat ;
use Getopt::Long ;
2019-10-18 11:24:21 +02:00
use IO::Dir ;
use IO::File ;
use IO::Handle ;
use IO::Select ;
use IO::Socket::UNIX ;
2011-08-23 07:47:04 +02:00
use IPC::Open3 ;
2012-05-29 14:01:50 +02:00
use JSON ;
2023-01-13 13:52:39 +01:00
use List::Util qw( first ) ;
2019-06-11 12:13:52 +02:00
use MIME::Base64 ;
2019-10-18 11:24:21 +02:00
use POSIX ;
use Storable qw( dclone ) ;
2021-10-14 11:28:49 +02:00
use Time::HiRes qw( gettimeofday usleep ) ;
2019-10-18 11:24:21 +02:00
use URI::Escape ;
2019-11-06 13:36:53 +01:00
use UUID ;
2019-10-18 11:24:21 +02:00
2020-04-27 10:24:23 +02:00
use PVE::Cluster qw( cfs_register_file cfs_read_file cfs_write_file ) ;
2020-10-30 10:42:35 +01:00
use PVE::CGroup ;
2022-06-30 17:09:45 -07:00
use PVE::CpuSet ;
2019-11-11 11:28:27 +01:00
use PVE::DataCenterConfig ;
2019-10-18 11:24:21 +02:00
use PVE::Exception qw( raise raise_param_exc ) ;
2021-04-06 19:39:41 +02:00
use PVE::Format qw( render_duration render_bytes ) ;
2020-03-10 14:26:29 +01:00
use PVE::GuestHelpers qw( safe_string_ne safe_num_ne safe_boolean_ne ) ;
2011-08-23 07:47:04 +02:00
use PVE::INotify ;
2020-09-02 14:07:02 +02:00
use PVE::JSONSchema qw( get_standard_option parse_property_string ) ;
2011-08-23 07:47:04 +02:00
use PVE::ProcFSTools ;
2020-12-03 12:43:40 +01:00
use PVE::PBSClient ;
2022-08-31 14:46:09 +02:00
use PVE::RESTEnvironment qw( log_warn ) ;
2012-12-12 15:35:26 +01:00
use PVE::RPCEnvironment ;
2019-10-18 11:24:21 +02:00
use PVE::Storage ;
2018-11-16 16:17:51 +01:00
use PVE::SysFSTools ;
2018-06-15 11:00:53 +02:00
use PVE::Systemd ;
2020-04-27 10:24:23 +02:00
use PVE::Tools qw( run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE ) ;
2019-10-18 11:24:21 +02:00
use PVE::QMPClient ;
use PVE::QemuConfig ;
2022-11-10 15:35:53 +01:00
use PVE::QemuServer::Helpers qw( min_version config_aware_timeout windows_version ) ;
2019-10-18 11:24:21 +02:00
use PVE::QemuServer::Cloudinit ;
2020-10-30 10:42:35 +01:00
use PVE::QemuServer::CGroup ;
2020-01-16 16:40:48 +01:00
use PVE::QemuServer::CPUConfig qw( print_cpu_device get_cpu_options ) ;
2021-06-04 11:47:44 +02:00
use PVE::QemuServer::Drive qw( is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive ) ;
2019-11-19 12:23:49 +01:00
use PVE::QemuServer::Machine ;
2019-10-18 11:24:21 +02:00
use PVE::QemuServer::Memory ;
2019-11-19 12:23:47 +01:00
use PVE::QemuServer::Monitor qw( mon_cmd ) ;
2020-06-18 16:36:53 +02:00
use PVE::QemuServer::PCI qw( print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci ) ;
2019-10-18 11:24:21 +02:00
use PVE::QemuServer::USB qw( parse_usb_device ) ;
2011-08-23 07:47:04 +02:00
2020-03-09 10:24:21 +01:00
my $ have_sdn ;
eval {
require PVE::Network::SDN::Zones ;
$ have_sdn = 1 ;
} ;
2018-03-16 13:58:27 +01:00
my $ EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/' ;
2018-11-12 14:10:36 +01:00
my $ OVMF = {
2021-10-05 18:02:06 +02:00
x86_64 = > {
2021-10-11 14:10:24 +02:00
'4m-no-smm' = > [
"$EDK2_FW_BASE/OVMF_CODE_4M.fd" ,
"$EDK2_FW_BASE/OVMF_VARS_4M.fd" ,
] ,
'4m-no-smm-ms' = > [
"$EDK2_FW_BASE/OVMF_CODE_4M.fd" ,
"$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd" ,
] ,
2021-10-05 18:02:06 +02:00
'4m' = > [
"$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd" ,
"$EDK2_FW_BASE/OVMF_VARS_4M.fd" ,
] ,
'4m-ms' = > [
"$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd" ,
"$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd" ,
] ,
default = > [
"$EDK2_FW_BASE/OVMF_CODE.fd" ,
"$EDK2_FW_BASE/OVMF_VARS.fd" ,
] ,
} ,
aarch64 = > {
default = > [
"$EDK2_FW_BASE/AAVMF_CODE.fd" ,
"$EDK2_FW_BASE/AAVMF_VARS.fd" ,
] ,
} ,
2018-11-12 14:10:36 +01:00
} ;
2016-09-08 11:03:01 +02:00
2011-09-12 11:03:14 +02:00
my $ cpuinfo = PVE::ProcFSTools:: read_cpuinfo ( ) ;
2011-08-23 07:47:04 +02:00
2021-10-20 08:24:08 +02:00
# Note about locking: we use flock on the config file protect against concurent actions.
# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
# But you can ignore this kind of lock with the --skiplock flag.
2011-08-23 07:47:04 +02:00
2022-11-16 12:08:05 +01:00
cfs_register_file (
'/qemu-server/' ,
\ & parse_vm_config ,
\ & write_vm_config
) ;
2011-08-23 07:47:04 +02:00
2011-09-14 12:02:08 +02:00
PVE::JSONSchema:: register_standard_option ( 'pve-qm-stateuri' , {
description = > "Some command save/restore state from this location." ,
type = > 'string' ,
maxLength = > 128 ,
optional = > 1 ,
} ) ;
2018-09-14 14:08:43 +02:00
PVE::JSONSchema:: register_standard_option ( 'pve-qemu-machine' , {
2022-12-20 10:23:32 +01:00
description = > "Specifies the QEMU machine type." ,
2018-09-14 14:08:43 +02:00
type = > 'string' ,
implement PVE Version addition for QEMU machine
With our QEMU 4.1.1 package we can pass a additional internal version
to QEMU's machine, it will be split out there and ignored, but
returned on a QMP 'query-machines' call.
This allows us to use it for increasing the granularity with which we
can roll-out HW layout changes/additions for VMs. Until now we
required a machine version bump, happening normally every major
release of QEMU, with seldom, for us irrelevant, exceptions.
This often delays rolling out a feature, which would break
live-migration, by several months. That can now be avoided, the new
"pve-version" component of the machine can be bumped at will, and
thus we are much more flexible.
That versions orders after the ($major, $minor) version components
from an stable release - it can thus also be reset on the next
release.
The implementation extends the qemu-machine REGEX, remembers
"pve-version" when doing a "query-machines" and integrates support
into the min_version and extract_version helpers.
We start out with a version of 1.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Reviewed-by: Stefan Reiter <s.reiter@proxmox.com>
2019-11-25 11:18:13 +01:00
pattern = > '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)' ,
2018-09-14 14:08:43 +02:00
maxLength = > 40 ,
optional = > 1 ,
} ) ;
2022-11-15 07:27:07 +01:00
# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
2019-12-10 11:05:39 +01:00
my $ nodename_cache ;
sub nodename {
$ nodename_cache // = PVE::INotify:: nodename ( ) ;
return $ nodename_cache ;
}
2011-08-23 07:47:04 +02:00
2016-03-30 12:20:13 +02:00
my $ watchdog_fmt = {
model = > {
default_key = > 1 ,
type = > 'string' ,
enum = > [ qw( i6300esb ib700 ) ] ,
description = > "Watchdog type to emulate." ,
default = > 'i6300esb' ,
optional = > 1 ,
} ,
action = > {
type = > 'string' ,
enum = > [ qw( reset shutdown poweroff pause debug none ) ] ,
description = > "The action to perform if after activation the guest fails to poll the watchdog in time." ,
optional = > 1 ,
} ,
} ;
PVE::JSONSchema:: register_format ( 'pve-qm-watchdog' , $ watchdog_fmt ) ;
2018-08-01 20:29:04 +02:00
my $ agent_fmt = {
enabled = > {
2022-12-20 10:23:32 +01:00
description = > "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM." ,
2018-08-01 20:29:04 +02:00
type = > 'boolean' ,
default = > 0 ,
default_key = > 1 ,
} ,
fstrim_cloned_disks = > {
2020-12-15 16:22:03 +01:00
description = > "Run fstrim after moving a disk or migrating the VM." ,
2018-08-01 20:29:04 +02:00
type = > 'boolean' ,
optional = > 1 ,
default = > 0
} ,
2019-11-18 17:46:12 +11:00
type = > {
description = > "Select the agent type" ,
type = > 'string' ,
default = > 'virtio' ,
optional = > 1 ,
enum = > [ qw( virtio isa ) ] ,
} ,
2018-08-01 20:29:04 +02:00
} ;
2018-11-09 13:31:09 +01:00
my $ vga_fmt = {
type = > {
description = > "Select the VGA type." ,
type = > 'string' ,
default = > 'std' ,
optional = > 1 ,
default_key = > 1 ,
2022-04-22 14:28:09 +02:00
enum = > [ qw( cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware ) ] ,
2018-11-09 13:31:09 +01:00
} ,
memory = > {
description = > "Sets the VGA memory (in MiB). Has no effect with serial display." ,
type = > 'integer' ,
optional = > 1 ,
minimum = > 4 ,
maximum = > 512 ,
} ,
} ;
2019-02-22 11:38:33 +01:00
my $ ivshmem_fmt = {
size = > {
type = > 'integer' ,
minimum = > 1 ,
description = > "The size of the file in MB." ,
} ,
name = > {
type = > 'string' ,
pattern = > '[a-zA-Z0-9\-]+' ,
optional = > 1 ,
format_description = > 'string' ,
description = > "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped." ,
} ,
} ;
2019-07-19 15:15:44 +02:00
my $ audio_fmt = {
device = > {
type = > 'string' ,
enum = > [ qw( ich9-intel-hda intel-hda AC97 ) ] ,
description = > "Configure an audio device."
} ,
driver = > {
type = > 'string' ,
2021-01-07 18:02:43 +01:00
enum = > [ 'spice' , 'none' ] ,
2019-07-19 15:15:44 +02:00
default = > 'spice' ,
optional = > 1 ,
description = > "Driver backend for the audio device."
} ,
} ;
2019-08-22 17:33:18 +02:00
my $ spice_enhancements_fmt = {
foldersharing = > {
type = > 'boolean' ,
optional = > 1 ,
2019-09-04 14:44:04 +02:00
default = > '0' ,
2019-08-22 17:33:18 +02:00
description = > "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
} ,
videostreaming = > {
type = > 'string' ,
enum = > [ 'off' , 'all' , 'filter' ] ,
2019-09-04 14:44:04 +02:00
default = > 'off' ,
2019-08-22 17:33:18 +02:00
optional = > 1 ,
description = > "Enable video streaming. Uses compression for detected video streams."
} ,
} ;
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
my $ rng_fmt = {
source = > {
type = > 'string' ,
enum = > [ '/dev/urandom' , '/dev/random' , '/dev/hwrng' ] ,
default_key = > 1 ,
2021-10-20 08:24:08 +02:00
description = > "The file on the host to gather entropy from. In most cases '/dev/urandom'"
. " should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
. " host. Using urandom does *not* decrease security in any meaningful way, as it's"
. " still seeded from real entropy, and the bytes provided will most likely be mixed"
. " with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
. " a hardware RNG from the host." ,
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
} ,
max_bytes = > {
type = > 'integer' ,
2021-10-20 08:24:08 +02:00
description = > "Maximum bytes of entropy allowed to get injected into the guest every"
. " 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
. " `0` to disable limiting (potentially dangerous!)." ,
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
optional = > 1 ,
2021-10-20 08:24:08 +02:00
# default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
# (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
# reading from /dev/urandom
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
default = > 1024 ,
} ,
period = > {
type = > 'integer' ,
2021-10-20 08:24:08 +02:00
description = > "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
. " the guest to retrieve another 'max_bytes' of entropy." ,
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
optional = > 1 ,
default = > 1000 ,
} ,
} ;
2021-10-21 09:10:49 +02:00
my $ meta_info_fmt = {
'ctime' = > {
type = > 'integer' ,
description = > "The guest creation timestamp as UNIX epoch time" ,
minimum = > 0 ,
optional = > 1 ,
} ,
2021-10-21 09:19:54 +02:00
'creation-qemu' = > {
type = > 'string' ,
description = > "The QEMU (machine) version from the time this VM was created." ,
pattern = > '\d+(\.\d+)+' ,
optional = > 1 ,
} ,
2021-10-21 09:10:49 +02:00
} ;
2011-08-23 07:47:04 +02:00
my $ confdesc = {
onboot = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Specifies whether a VM will be started during system bootup." ,
default = > 0 ,
} ,
autostart = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Automatic restart after crash (currently ignored)." ,
default = > 0 ,
} ,
2011-10-10 16:46:53 +02:00
hotplug = > {
2021-10-20 12:56:57 +02:00
optional = > 1 ,
type = > 'string' , format = > 'pve-hotplug-features' ,
description = > "Selectively enable hotplug features. This is a comma separated list of"
2022-06-22 13:52:05 +02:00
. " hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
2022-11-10 15:35:58 +01:00
. " hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
. " USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
. " windows > 7." ,
2015-01-27 07:16:22 +01:00
default = > 'network,disk,usb' ,
2011-10-10 16:46:53 +02:00
} ,
2011-08-23 07:47:04 +02:00
reboot = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Allow reboot. If set to '0' the VM exit on reboot." ,
default = > 1 ,
} ,
lock = > {
optional = > 1 ,
type = > 'string' ,
description = > "Lock/unlock the VM." ,
2019-03-14 17:04:47 +01:00
enum = > [ qw( backup clone create migrate rollback snapshot snapshot-delete suspending suspended ) ] ,
2011-08-23 07:47:04 +02:00
} ,
cpulimit = > {
optional = > 1 ,
2015-06-02 16:03:25 +02:00
type = > 'number' ,
2016-05-19 13:13:25 +02:00
description = > "Limit of CPU usage." ,
2021-10-20 08:24:08 +02:00
verbose_description = > "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
. " total of '2' CPU time. Value '0' indicates no CPU limit." ,
2011-08-23 07:47:04 +02:00
minimum = > 0 ,
2015-06-02 16:03:25 +02:00
maximum = > 128 ,
2021-10-20 12:56:57 +02:00
default = > 0 ,
2011-08-23 07:47:04 +02:00
} ,
cpuunits = > {
optional = > 1 ,
type = > 'integer' ,
2021-10-20 12:56:57 +02:00
description = > "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2." ,
2021-07-07 10:27:05 +02:00
verbose_description = > "CPU weight for a VM. Argument is used in the kernel fair scheduler."
. " The larger the number is, the more CPU time this VM gets. Number is relative to"
. " weights of all the other running VMs." ,
2022-02-11 09:29:50 +01:00
minimum = > 1 ,
2017-10-09 13:40:23 +02:00
maximum = > 262144 ,
2021-07-07 10:27:05 +02:00
default = > 'cgroup v1: 1024, cgroup v2: 100' ,
2011-08-23 07:47:04 +02:00
} ,
memory = > {
optional = > 1 ,
type = > 'integer' ,
2023-01-23 10:46:38 +01:00
description = > "Amount of RAM for the VM in MiB. This is the maximum available memory when"
2021-10-20 08:24:08 +02:00
. " you use the balloon device." ,
2011-08-23 07:47:04 +02:00
minimum = > 16 ,
default = > 512 ,
} ,
2011-09-16 04:46:26 +02:00
balloon = > {
2021-10-20 12:56:57 +02:00
optional = > 1 ,
type = > 'integer' ,
2023-01-23 10:46:38 +01:00
description = > "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver." ,
2012-12-19 07:24:39 +01:00
minimum = > 0 ,
} ,
shares = > {
2021-10-20 12:56:57 +02:00
optional = > 1 ,
type = > 'integer' ,
description = > "Amount of memory shares for auto-ballooning. The larger the number is, the"
2021-10-20 08:24:08 +02:00
. " more memory this VM gets. Number is relative to weights of all other running VMs."
. " Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd." ,
2012-12-19 07:24:39 +01:00
minimum = > 0 ,
maximum = > 50000 ,
default = > 1000 ,
2011-09-16 04:46:26 +02:00
} ,
2011-08-23 07:47:04 +02:00
keyboard = > {
optional = > 1 ,
type = > 'string' ,
2022-04-25 17:21:38 +02:00
description = > "Keyboard layout for VNC server. This option is generally not required and"
. " is often better handled from within the guest OS." ,
2012-01-09 11:25:25 +01:00
enum = > PVE::Tools:: kvmkeymaplist ( ) ,
2017-11-07 11:09:30 +01:00
default = > undef ,
2011-08-23 07:47:04 +02:00
} ,
name = > {
optional = > 1 ,
2012-03-13 07:00:27 +01:00
type = > 'string' , format = > 'dns-name' ,
2011-08-23 07:47:04 +02:00
description = > "Set a name for the VM. Only used on the configuration web interface." ,
} ,
2012-07-30 14:58:40 +02:00
scsihw = > {
optional = > 1 ,
type = > 'string' ,
2016-05-19 13:13:25 +02:00
description = > "SCSI controller model" ,
2015-03-27 03:41:52 +01:00
enum = > [ qw( lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi ) ] ,
2012-07-30 14:58:40 +02:00
default = > 'lsi' ,
} ,
2011-08-23 07:47:04 +02:00
description = > {
optional = > 1 ,
type = > 'string' ,
2021-06-18 13:11:07 +02:00
description = > "Description for the VM. Shown in the web-interface VM's summary."
. " This is saved as comment inside the configuration file." ,
maxLength = > 1024 * 8 ,
2011-08-23 07:47:04 +02:00
} ,
ostype = > {
optional = > 1 ,
type = > 'string' ,
2021-10-20 12:56:57 +02:00
enum = > [ qw( other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris ) ] ,
2016-05-19 13:13:25 +02:00
description = > "Specify guest operating system." ,
verbose_description = > << EODESC ,
Specify guest operating system . This is used to enable special
optimization / features for specific operating systems:
[ horizontal ]
other ; ; unspecified OS
wxp ; ; Microsoft Windows XP
w2k ; ; Microsoft Windows 2000
w2k3 ; ; Microsoft Windows 2003
w2k8 ; ; Microsoft Windows 2008
wvista ; ; Microsoft Windows Vista
win7 ; ; Microsoft Windows 7
2017-11-07 08:27:37 +01:00
win8 ; ; Microsoft Windows 8 /2012/ 2012 r2
2021-03-05 20:45:19 +01:00
win10 ; ; Microsoft Windows 10 /2016/ 2019
2021-10-11 14:55:18 +02:00
win11 ; ; Microsoft Windows 11 / 2022
2016-05-19 13:13:25 +02:00
l24 ; ; Linux 2.4 Kernel
2023-01-30 11:02:53 +01:00
l26 ; ; Linux 2.6 - 6 . X Kernel
2016-05-19 13:13:25 +02:00
solaris ; ; Solaris /OpenSolaris/ OpenIndiania kernel
2011-08-23 07:47:04 +02:00
EODESC
} ,
boot = > {
optional = > 1 ,
2020-10-06 15:32:15 +02:00
type = > 'string' , format = > 'pve-qm-boot' ,
2021-10-20 12:56:57 +02:00
description = > "Specify guest boot order. Use the 'order=' sub-property as usage with no"
. " key or 'legacy=' is deprecated." ,
2011-08-23 07:47:04 +02:00
} ,
bootdisk = > {
optional = > 1 ,
type = > 'string' , format = > 'pve-qm-bootdisk' ,
2020-10-06 15:32:15 +02:00
description = > "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead." ,
2012-08-29 09:19:21 +02:00
pattern = > '(ide|sata|scsi|virtio)\d+' ,
2011-08-23 07:47:04 +02:00
} ,
smp = > {
optional = > 1 ,
type = > 'integer' ,
description = > "The number of CPUs. Please use option -sockets instead." ,
minimum = > 1 ,
default = > 1 ,
} ,
sockets = > {
optional = > 1 ,
type = > 'integer' ,
description = > "The number of CPU sockets." ,
minimum = > 1 ,
default = > 1 ,
} ,
cores = > {
optional = > 1 ,
type = > 'integer' ,
description = > "The number of cores per socket." ,
minimum = > 1 ,
default = > 1 ,
} ,
add numa options v3
This enable numa support inside the guest, and share the memory and cores across the sockets numa nodes.
numa: 0|1
example:
-------
sockets:2
cores:2
memory:4096
numa: 1
qemu command line
-----------------
-object memory-backend-ram,size=2048,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=2048,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:47 +01:00
numa = > {
optional = > 1 ,
type = > 'boolean' ,
2016-03-16 14:53:07 +01:00
description = > "Enable/disable NUMA." ,
add numa options v3
This enable numa support inside the guest, and share the memory and cores across the sockets numa nodes.
numa: 0|1
example:
-------
sockets:2
cores:2
memory:4096
numa: 1
qemu command line
-----------------
-object memory-backend-ram,size=2048,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=2048,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:47 +01:00
default = > 0 ,
} ,
2016-06-15 05:04:02 +02:00
hugepages = > {
optional = > 1 ,
type = > 'string' ,
description = > "Enable/disable hugepages memory." ,
enum = > [ qw( any 2 1024 ) ] ,
} ,
2020-09-02 11:03:37 +02:00
keephugepages = > {
optional = > 1 ,
type = > 'boolean' ,
default = > 0 ,
2020-09-02 14:07:02 +02:00
description = > "Use together with hugepages. If enabled, hugepages will not not be deleted"
. " after VM shutdown and can be used for subsequent starts." ,
2020-09-02 11:03:37 +02:00
} ,
2015-01-09 16:30:35 +01:00
vcpus = > {
2014-01-07 13:32:50 +01:00
optional = > 1 ,
type = > 'integer' ,
2015-01-09 16:30:35 +01:00
description = > "Number of hotplugged vcpus." ,
2014-01-07 13:32:50 +01:00
minimum = > 1 ,
2015-01-09 16:30:35 +01:00
default = > 0 ,
2014-01-07 13:32:50 +01:00
} ,
2011-08-23 07:47:04 +02:00
acpi = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Enable/disable ACPI." ,
default = > 1 ,
} ,
2012-09-04 06:31:44 +02:00
agent = > {
2012-09-03 09:51:08 +02:00
optional = > 1 ,
2022-12-20 10:23:32 +01:00
description = > "Enable/disable communication with the QEMU Guest Agent and its properties." ,
2018-08-01 20:29:04 +02:00
type = > 'string' ,
format = > $ agent_fmt ,
2012-09-03 09:51:08 +02:00
} ,
2011-08-23 07:47:04 +02:00
kvm = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Enable/disable KVM hardware virtualization." ,
default = > 1 ,
} ,
tdf = > {
optional = > 1 ,
type = > 'boolean' ,
2012-09-26 12:42:03 +02:00
description = > "Enable/disable time drift fix." ,
default = > 0 ,
2011-08-23 07:47:04 +02:00
} ,
2011-09-12 12:26:00 +02:00
localtime = > {
2011-08-23 07:47:04 +02:00
optional = > 1 ,
type = > 'boolean' ,
2021-10-20 08:24:08 +02:00
description = > "Set the real time clock (RTC) to local time. This is enabled by default if"
. " the `ostype` indicates a Microsoft Windows OS." ,
2011-08-23 07:47:04 +02:00
} ,
freeze = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Freeze CPU at startup (use 'c' monitor command to start execution)." ,
} ,
vga = > {
optional = > 1 ,
2018-11-09 13:31:09 +01:00
type = > 'string' , format = > $ vga_fmt ,
description = > "Configure the VGA hardware." ,
2020-09-02 14:07:02 +02:00
verbose_description = > "Configure the VGA Hardware. If you want to use high resolution"
. " modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
. " 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
. " versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
. " display server. For win* OS you can select how many independent displays you want,"
. " Linux guests can add displays them self.\nYou can also run without any graphic card,"
. " using a serial device as terminal." ,
2011-08-23 07:47:04 +02:00
} ,
2011-09-08 11:39:56 +02:00
watchdog = > {
optional = > 1 ,
type = > 'string' , format = > 'pve-qm-watchdog' ,
2016-05-19 13:13:25 +02:00
description = > "Create a virtual hardware watchdog device." ,
2020-09-02 14:07:02 +02:00
verbose_description = > "Create a virtual hardware watchdog device. Once enabled (by a guest"
. " action), the watchdog must be periodically polled by an agent inside the guest or"
. " else the watchdog will reset the guest (or execute the respective action specified)" ,
2011-09-08 11:39:56 +02:00
} ,
2011-08-23 07:47:04 +02:00
startdate = > {
optional = > 1 ,
2011-09-12 12:26:00 +02:00
type = > 'string' ,
2011-08-23 07:47:04 +02:00
typetext = > "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)" ,
2020-09-02 14:07:02 +02:00
description = > "Set the initial date of the real time clock. Valid format for date are:"
. "'now' or '2006-06-17T16:01:21' or '2006-06-17'." ,
2011-08-23 07:47:04 +02:00
pattern = > '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)' ,
default = > 'now' ,
} ,
2015-04-22 10:02:33 +02:00
startup = > get_standard_option ( 'pve-startup-order' ) ,
2013-04-18 17:05:29 +02:00
template = > {
optional = > 1 ,
type = > 'boolean' ,
description = > "Enable/disable Template." ,
default = > 0 ,
} ,
2011-08-23 07:47:04 +02:00
args = > {
optional = > 1 ,
type = > 'string' ,
2016-05-19 13:13:25 +02:00
description = > "Arbitrary arguments passed to kvm." ,
verbose_description = > << EODESCR ,
2016-04-15 13:12:00 +02:00
Arbitrary arguments passed to kvm , for example:
2011-08-23 07:47:04 +02:00
args: - no - reboot - no - hpet
2016-04-15 13:12:00 +02:00
NOTE: this option is for experts only .
2011-08-23 07:47:04 +02:00
EODESCR
} ,
tablet = > {
optional = > 1 ,
type = > 'boolean' ,
default = > 1 ,
2016-05-19 13:13:25 +02:00
description = > "Enable/disable the USB tablet device." ,
2020-09-02 14:07:02 +02:00
verbose_description = > "Enable/disable the USB tablet device. This device is usually needed"
. " to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
. " normal VNC clients. If you're running lots of console-only guests on one host, you"
. " may consider disabling this to save some context switches. This is turned off by"
. " default if you use spice (`qm set <vmid> --vga qxl`)." ,
2011-08-23 07:47:04 +02:00
} ,
migrate_speed = > {
optional = > 1 ,
type = > 'integer' ,
description = > "Set maximum speed (in MB/s) for migrations. Value 0 is no limit." ,
minimum = > 0 ,
default = > 0 ,
} ,
migrate_downtime = > {
optional = > 1 ,
2012-12-30 19:03:00 +01:00
type = > 'number' ,
2011-08-23 07:47:04 +02:00
description = > "Set maximum tolerated downtime (in seconds) for migrations." ,
minimum = > 0 ,
2012-12-30 19:03:00 +01:00
default = > 0.1 ,
2011-08-23 07:47:04 +02:00
} ,
cdrom = > {
optional = > 1 ,
2016-10-18 10:38:58 +02:00
type = > 'string' , format = > 'pve-qm-ide' ,
2016-11-05 15:56:12 +01:00
typetext = > '<volume>' ,
2011-08-23 07:47:04 +02:00
description = > "This is an alias for option -ide2" ,
} ,
cpu = > {
optional = > 1 ,
description = > "Emulated CPU type." ,
type = > 'string' ,
2020-02-12 16:11:23 +01:00
format = > 'pve-vm-cpu-conf' ,
2011-08-23 07:47:04 +02:00
} ,
2012-09-10 10:15:14 +02:00
parent = > get_standard_option ( 'pve-snapshot-name' , {
optional = > 1 ,
description = > "Parent snapshot name. This is used internally, and should not be modified." ,
} ) ,
2012-09-11 08:45:39 +02:00
snaptime = > {
optional = > 1 ,
description = > "Timestamp for snapshots." ,
type = > 'integer' ,
minimum = > 0 ,
} ,
2012-09-12 11:59:48 +02:00
vmstate = > {
optional = > 1 ,
type = > 'string' , format = > 'pve-volume-id' ,
2020-09-02 14:07:02 +02:00
description = > "Reference to a volume which stores the VM state. This is used internally"
. " for snapshots." ,
2012-09-12 11:59:48 +02:00
} ,
2017-05-15 14:11:56 +02:00
vmstatestorage = > get_standard_option ( 'pve-storage-id' , {
description = > "Default storage for VM state volumes/files." ,
optional = > 1 ,
} ) ,
2018-09-14 14:08:43 +02:00
runningmachine = > get_standard_option ( 'pve-qemu-machine' , {
2020-09-02 14:07:02 +02:00
description = > "Specifies the QEMU machine type of the running vm. This is used internally"
. " for snapshots." ,
2018-09-14 14:08:43 +02:00
} ) ,
2020-04-07 15:56:16 +02:00
runningcpu = > {
2020-09-02 14:07:02 +02:00
description = > "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
. " internally for snapshots." ,
2020-04-07 15:56:16 +02:00
optional = > 1 ,
type = > 'string' ,
pattern = > $ PVE:: QemuServer:: CPUConfig:: qemu_cmdline_cpu_re ,
format_description = > 'QEMU -cpu parameter'
} ,
2018-09-14 14:08:43 +02:00
machine = > get_standard_option ( 'pve-qemu-machine' ) ,
2018-11-12 14:10:34 +01:00
arch = > {
description = > "Virtual processor architecture. Defaults to the host." ,
optional = > 1 ,
type = > 'string' ,
enum = > [ qw( x86_64 aarch64 ) ] ,
} ,
2014-06-26 11:12:25 +02:00
smbios1 = > {
description = > "Specify SMBIOS type 1 fields." ,
type = > 'string' , format = > 'pve-qm-smbios1' ,
2019-06-11 17:39:20 +02:00
maxLength = > 512 ,
2014-06-26 11:12:25 +02:00
optional = > 1 ,
} ,
2015-09-03 15:35:37 +02:00
protection = > {
optional = > 1 ,
type = > 'boolean' ,
2020-09-02 14:07:02 +02:00
description = > "Sets the protection flag of the VM. This will disable the remove VM and"
. " remove disk operations." ,
2015-09-03 15:35:37 +02:00
default = > 0 ,
} ,
2015-12-10 10:48:04 +01:00
bios = > {
2015-11-21 08:48:59 +01:00
optional = > 1 ,
2015-12-10 10:48:04 +01:00
type = > 'string' ,
enum = > [ qw( seabios ovmf ) ] ,
description = > "Select BIOS implementation." ,
default = > 'seabios' ,
2015-11-21 08:48:59 +01:00
} ,
2018-09-19 11:35:11 +02:00
vmgenid = > {
type = > 'string' ,
pattern = > '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])' ,
format_description = > 'UUID' ,
2020-09-02 14:07:02 +02:00
description = > "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
. " to disable explicitly." ,
verbose_description = > "The VM generation ID (vmgenid) device exposes a 128-bit integer"
. " value identifier to the guest OS. This allows to notify the guest operating system"
. " when the virtual machine is executed with a different configuration (e.g. snapshot"
. " execution or creation from a template). The guest operating system notices the"
. " change, and is then able to react as appropriate by marking its copies of"
. " distributed databases as dirty, re-initializing its random number generator, etc.\n"
. "Note that auto-creation only works when done through API/CLI create or update methods"
. ", but not when manually editing the config file." ,
2018-09-19 13:33:40 +02:00
default = > "1 (autogenerated)" ,
2018-09-19 11:35:11 +02:00
optional = > 1 ,
} ,
2019-01-31 14:33:39 +01:00
hookscript = > {
type = > 'string' ,
format = > 'pve-volume-id' ,
optional = > 1 ,
description = > "Script that will be executed during various steps in the vms lifetime." ,
} ,
2019-02-22 11:38:33 +01:00
ivshmem = > {
type = > 'string' ,
format = > $ ivshmem_fmt ,
2020-09-02 14:07:02 +02:00
description = > "Inter-VM shared memory. Useful for direct communication between VMs, or to"
. " the host." ,
2019-02-22 11:38:33 +01:00
optional = > 1 ,
2019-07-17 15:58:57 +02:00
} ,
audio0 = > {
type = > 'string' ,
2019-07-19 15:15:44 +02:00
format = > $ audio_fmt ,
2019-07-18 09:08:40 +02:00
description = > "Configure a audio device, useful in combination with QXL/Spice." ,
2019-07-17 15:58:57 +02:00
optional = > 1
} ,
2019-08-22 17:33:18 +02:00
spice_enhancements = > {
type = > 'string' ,
format = > $ spice_enhancements_fmt ,
description = > "Configure additional enhancements for SPICE." ,
optional = > 1
} ,
2019-10-31 13:36:25 +01:00
tags = > {
type = > 'string' , format = > 'pve-tag-list' ,
description = > 'Tags of the VM. This is only meta information.' ,
optional = > 1 ,
} ,
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
rng0 = > {
type = > 'string' ,
format = > $ rng_fmt ,
description = > "Configure a VirtIO-based Random Number Generator." ,
optional = > 1 ,
} ,
2021-10-21 09:10:49 +02:00
meta = > {
type = > 'string' ,
format = > $ meta_info_fmt ,
description = > "Some (read-only) meta-information about this guest." ,
optional = > 1 ,
} ,
2022-06-30 17:09:45 -07:00
affinity = > {
type = > 'string' , format = > 'pve-cpuset' ,
2022-11-15 08:33:04 +01:00
description = > "List of host cores used to execute guest processes, for example: 0,5,8-11" ,
2022-06-30 17:09:45 -07:00
optional = > 1 ,
} ,
2015-08-17 15:46:07 +02:00
} ;
2019-02-07 15:12:35 +01:00
my $ cicustom_fmt = {
meta = > {
type = > 'string' ,
optional = > 1 ,
2020-09-02 14:07:02 +02:00
description = > ' Specify a custom file containing all meta data passed to the VM via "
. " cloud - init . This is provider specific meaning configdrive2 and nocloud differ . ' ,
2019-02-07 15:12:35 +01:00
format = > 'pve-volume-id' ,
format_description = > 'volume' ,
} ,
network = > {
type = > 'string' ,
optional = > 1 ,
2023-01-19 11:57:03 +01:00
description = > 'To pass a custom file containing all network data to the VM via cloud-init.' ,
2019-02-07 15:12:35 +01:00
format = > 'pve-volume-id' ,
format_description = > 'volume' ,
} ,
user = > {
type = > 'string' ,
optional = > 1 ,
2023-01-19 11:57:03 +01:00
description = > 'To pass a custom file containing all user data to the VM via cloud-init.' ,
2019-02-07 15:12:35 +01:00
format = > 'pve-volume-id' ,
format_description = > 'volume' ,
} ,
2021-10-30 16:49:41 +02:00
vendor = > {
2023-01-19 11:57:03 +01:00
type = > 'string' ,
optional = > 1 ,
description = > 'To pass a custom file containing all vendor data to the VM via cloud-init.' ,
format = > 'pve-volume-id' ,
format_description = > 'volume' ,
2021-10-30 16:49:41 +02:00
} ,
2019-02-07 15:12:35 +01:00
} ;
PVE::JSONSchema:: register_format ( 'pve-qm-cicustom' , $ cicustom_fmt ) ;
2015-08-17 15:46:07 +02:00
my $ confdesc_cloudinit = {
2018-02-27 10:45:08 +01:00
citype = > {
optional = > 1 ,
type = > 'string' ,
2020-09-02 14:07:02 +02:00
description = > 'Specifies the cloud-init configuration format. The default depends on the'
. ' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
. ' and `configdrive2` for windows.' ,
2021-02-06 14:01:29 +01:00
enum = > [ 'configdrive2' , 'nocloud' , 'opennebula' ] ,
2018-02-27 10:45:08 +01:00
} ,
2018-03-06 15:08:23 +01:00
ciuser = > {
optional = > 1 ,
type = > 'string' ,
2020-09-02 14:07:02 +02:00
description = > "cloud-init: User name to change ssh keys and password for instead of the"
. " image's configured default user." ,
2018-03-06 15:08:23 +01:00
} ,
cipassword = > {
optional = > 1 ,
type = > 'string' ,
2020-09-02 14:07:02 +02:00
description = > 'cloud-init: Password to assign the user. Using this is generally not'
. ' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
. ' support hashed passwords.' ,
2018-03-06 15:08:23 +01:00
} ,
2019-02-07 15:12:35 +01:00
cicustom = > {
optional = > 1 ,
type = > 'string' ,
2020-09-02 14:07:02 +02:00
description = > 'cloud-init: Specify custom files to replace the automatically generated'
. ' ones at start.' ,
2019-02-07 15:12:35 +01:00
format = > 'pve-qm-cicustom' ,
} ,
2015-06-16 14:26:43 +02:00
searchdomain = > {
optional = > 1 ,
type = > 'string' ,
2022-06-30 12:12:03 +02:00
description = > 'cloud-init: Sets DNS search domains for a container. Create will'
2020-09-02 14:07:02 +02:00
. ' automatically use the setting from the host if neither searchdomain nor nameserver'
2022-06-30 12:12:03 +02:00
. ' are set.' ,
2015-06-16 14:26:43 +02:00
} ,
nameserver = > {
optional = > 1 ,
type = > 'string' , format = > 'address-list' ,
2022-06-30 12:12:03 +02:00
description = > 'cloud-init: Sets DNS server IP address for a container. Create will'
2020-09-02 14:07:02 +02:00
. ' automatically use the setting from the host if neither searchdomain nor nameserver'
2022-06-30 12:12:03 +02:00
. ' are set.' ,
2015-06-16 14:26:43 +02:00
} ,
sshkeys = > {
optional = > 1 ,
type = > 'string' ,
format = > 'urlencoded' ,
2018-03-07 10:53:41 +01:00
description = > "cloud-init: Setup public SSH keys (one key per line, OpenSSH format)." ,
2015-06-16 14:26:43 +02:00
} ,
2011-08-23 07:47:04 +02:00
} ;
# what about other qemu settings ?
#cpu => 'string',
#machine => 'string',
#fda => 'file',
#fdb => 'file',
#mtdblock => 'file',
#sd => 'file',
#pflash => 'file',
#snapshot => 'bool',
#bootp => 'file',
##tftp => 'dir',
##smb => 'dir',
#kernel => 'file',
#append => 'string',
#initrd => 'file',
##soundhw => 'string',
while ( my ( $ k , $ v ) = each %$ confdesc ) {
PVE::JSONSchema:: register_standard_option ( "pve-qm-$k" , $ v ) ;
}
2022-11-10 15:35:57 +01:00
my $ MAX_USB_DEVICES = 14 ;
2012-08-20 11:10:24 +02:00
my $ MAX_NETS = 32 ;
2011-09-11 09:00:00 +02:00
my $ MAX_SERIAL_PORTS = 4 ;
2011-09-11 09:00:01 +02:00
my $ MAX_PARALLEL_PORTS = 3 ;
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
my $ MAX_NUMA = 8 ;
2016-03-30 12:20:10 +02:00
my $ numa_fmt = {
cpus = > {
type = > "string" ,
pattern = > qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/ ,
2016-05-19 13:13:25 +02:00
description = > "CPUs accessing this NUMA node." ,
2016-03-30 12:20:10 +02:00
format_description = > "id[-id];..." ,
} ,
memory = > {
type = > "number" ,
2016-05-19 13:13:25 +02:00
description = > "Amount of memory this NUMA node provides." ,
2016-03-30 12:20:10 +02:00
optional = > 1 ,
} ,
hostnodes = > {
type = > "string" ,
pattern = > qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/ ,
2016-05-19 13:13:25 +02:00
description = > "Host NUMA nodes to use." ,
2016-03-30 12:20:10 +02:00
format_description = > "id[-id];..." ,
optional = > 1 ,
} ,
policy = > {
type = > 'string' ,
enum = > [ qw( preferred bind interleave ) ] ,
2016-05-19 13:13:25 +02:00
description = > "NUMA allocation policy." ,
2016-03-30 12:20:10 +02:00
optional = > 1 ,
} ,
} ;
PVE::JSONSchema:: register_format ( 'pve-qm-numanode' , $ numa_fmt ) ;
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
my $ numadesc = {
optional = > 1 ,
2016-03-30 12:20:10 +02:00
type = > 'string' , format = > $ numa_fmt ,
2016-05-19 13:13:25 +02:00
description = > "NUMA topology." ,
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
} ;
PVE::JSONSchema:: register_standard_option ( "pve-qm-numanode" , $ numadesc ) ;
for ( my $ i = 0 ; $ i < $ MAX_NUMA ; $ i + + ) {
$ confdesc - > { "numa$i" } = $ numadesc ;
}
2011-08-23 07:47:04 +02:00
2021-07-16 18:57:38 +02:00
my $ nic_model_list = [
'e1000' ,
'e1000-82540em' ,
'e1000-82544gc' ,
'e1000-82545em' ,
2021-07-16 19:09:57 +02:00
'e1000e' ,
2021-07-16 18:57:38 +02:00
'i82551' ,
'i82557b' ,
'i82559er' ,
'ne2k_isa' ,
'ne2k_pci' ,
'pcnet' ,
'rtl8139' ,
'virtio' ,
'vmxnet3' ,
] ;
2011-09-15 09:11:27 +02:00
my $ nic_model_list_txt = join ( ' ' , sort @$ nic_model_list ) ;
2011-08-23 07:47:04 +02:00
2016-05-19 13:13:25 +02:00
my $ net_fmt_bridge_descr = << __EOD__ ;
Bridge to attach the network device to . The Proxmox VE standard bridge
is called 'vmbr0' .
If you do not specify a bridge , we create a kvm user ( NATed ) network
device , which provides DHCP and DNS services . The following addresses
are used:
10.0 .2 .2 Gateway
10.0 .2 .3 DNS Server
10.0 .2 .4 SMB Server
The DHCP server assign addresses to the guest starting from 10.0 .2 .15 .
__EOD__
2016-03-30 12:20:11 +02:00
my $ net_fmt = {
2019-03-12 16:07:45 +01:00
macaddr = > get_standard_option ( 'mac-addr' , {
2020-09-02 14:07:02 +02:00
description = > "MAC address. That address must be unique withing your network. This is"
. " automatically generated if not specified." ,
2019-03-12 16:07:45 +01:00
} ) ,
2016-05-11 10:11:49 +02:00
model = > {
type = > 'string' ,
2020-09-02 14:07:02 +02:00
description = > "Network Card Model. The 'virtio' model provides the best performance with"
. " very low CPU overhead. If your guest does not support this driver, it is usually"
. " best to use 'e1000'." ,
2016-05-11 10:11:49 +02:00
enum = > $ nic_model_list ,
default_key = > 1 ,
} ,
( map { $ _ = > { keyAlias = > 'model' , alias = > 'macaddr' } } @$ nic_model_list ) ,
2022-02-09 14:07:38 +01:00
bridge = > get_standard_option ( 'pve-bridge-id' , {
2016-05-19 13:13:25 +02:00
description = > $ net_fmt_bridge_descr ,
2016-03-30 12:20:11 +02:00
optional = > 1 ,
2022-02-09 14:07:38 +01:00
} ) ,
2016-03-30 12:20:11 +02:00
queues = > {
type = > 'integer' ,
2022-11-16 05:13:32 +01:00
minimum = > 0 , maximum = > 64 ,
2016-03-30 12:20:11 +02:00
description = > 'Number of packet queues to be used on the device.' ,
optional = > 1 ,
} ,
rate = > {
type = > 'number' ,
minimum = > 0 ,
2016-05-19 13:13:25 +02:00
description = > "Rate limit in mbps (megabytes per second) as floating point number." ,
2016-03-30 12:20:11 +02:00
optional = > 1 ,
} ,
tag = > {
type = > 'integer' ,
2016-05-18 11:18:28 +02:00
minimum = > 1 , maximum = > 4094 ,
2016-03-30 12:20:11 +02:00
description = > 'VLAN tag to apply to packets on this interface.' ,
optional = > 1 ,
} ,
trunks = > {
type = > 'string' ,
pattern = > qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/ ,
description = > 'VLAN trunks to pass through this interface.' ,
2016-05-11 10:11:49 +02:00
format_description = > 'vlanid[;vlanid...]' ,
2016-03-30 12:20:11 +02:00
optional = > 1 ,
} ,
firewall = > {
type = > 'boolean' ,
description = > 'Whether this interface should be protected by the firewall.' ,
optional = > 1 ,
} ,
link_down = > {
type = > 'boolean' ,
2016-05-19 13:13:25 +02:00
description = > 'Whether this interface should be disconnected (like pulling the plug).' ,
2016-03-30 12:20:11 +02:00
optional = > 1 ,
} ,
2020-04-17 07:47:20 +02:00
mtu = > {
type = > 'integer' ,
minimum = > 1 , maximum = > 65520 ,
2020-06-04 11:11:52 +02:00
description = > "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU" ,
2020-04-17 07:47:20 +02:00
optional = > 1 ,
} ,
2016-03-30 12:20:11 +02:00
} ;
2016-05-19 13:13:25 +02:00
2011-08-23 07:47:04 +02:00
my $ netdesc = {
optional = > 1 ,
2016-05-11 10:11:49 +02:00
type = > 'string' , format = > $ net_fmt ,
2016-05-19 13:13:25 +02:00
description = > "Specify network devices." ,
2011-08-23 07:47:04 +02:00
} ;
2016-05-19 13:13:25 +02:00
2011-08-23 07:47:04 +02:00
PVE::JSONSchema:: register_standard_option ( "pve-qm-net" , $ netdesc ) ;
2015-06-16 14:26:43 +02:00
my $ ipconfig_fmt = {
ip = > {
type = > 'string' ,
format = > 'pve-ipv4-config' ,
format_description = > 'IPv4Format/CIDR' ,
description = > 'IPv4 address in CIDR format.' ,
optional = > 1 ,
default = > 'dhcp' ,
} ,
gw = > {
type = > 'string' ,
format = > 'ipv4' ,
format_description = > 'GatewayIPv4' ,
description = > 'Default gateway for IPv4 traffic.' ,
optional = > 1 ,
requires = > 'ip' ,
} ,
ip6 = > {
type = > 'string' ,
format = > 'pve-ipv6-config' ,
format_description = > 'IPv6Format/CIDR' ,
description = > 'IPv6 address in CIDR format.' ,
optional = > 1 ,
default = > 'dhcp' ,
} ,
gw6 = > {
type = > 'string' ,
format = > 'ipv6' ,
format_description = > 'GatewayIPv6' ,
description = > 'Default gateway for IPv6 traffic.' ,
optional = > 1 ,
requires = > 'ip6' ,
} ,
} ;
PVE::JSONSchema:: register_format ( 'pve-qm-ipconfig' , $ ipconfig_fmt ) ;
my $ ipconfigdesc = {
optional = > 1 ,
type = > 'string' , format = > 'pve-qm-ipconfig' ,
description = > << 'EODESCR' ,
cloud - init: Specify IP addresses and gateways for the corresponding interface .
IP addresses use CIDR notation , gateways are optional but need an IP of the same type specified .
2020-09-02 14:07:02 +02:00
The special string 'dhcp' can be used for IP addresses to use DHCP , in which case no explicit
gateway should be provided .
2021-03-29 14:07:15 +02:00
For IPv6 the special string 'auto' can be used to use stateless autoconfiguration . This requires
cloud - init 19.4 or newer .
2015-06-16 14:26:43 +02:00
2020-09-02 14:07:02 +02:00
If cloud - init is enabled and neither an IPv4 nor an IPv6 address is specified , it defaults to using
dhcp on IPv4 .
2015-06-16 14:26:43 +02:00
EODESCR
} ;
PVE::JSONSchema:: register_standard_option ( "pve-qm-ipconfig" , $ netdesc ) ;
2011-08-23 07:47:04 +02:00
for ( my $ i = 0 ; $ i < $ MAX_NETS ; $ i + + ) {
$ confdesc - > { "net$i" } = $ netdesc ;
2015-08-17 15:46:07 +02:00
$ confdesc_cloudinit - > { "ipconfig$i" } = $ ipconfigdesc ;
}
foreach my $ key ( keys %$ confdesc_cloudinit ) {
$ confdesc - > { $ key } = $ confdesc_cloudinit - > { $ key } ;
2011-08-23 07:47:04 +02:00
}
2022-06-30 17:09:45 -07:00
PVE::JSONSchema:: register_format ( 'pve-cpuset' , \ & pve_verify_cpuset ) ;
sub pve_verify_cpuset {
my ( $ set_text , $ noerr ) = @ _ ;
my ( $ count , $ members ) = eval { PVE::CpuSet:: parse_cpuset ( $ set_text ) } ;
if ( $@ ) {
return if $ noerr ;
die "unable to parse cpuset option\n" ;
}
return PVE::CpuSet - > new ( $ members ) - > short_string ( ) ;
}
2016-04-21 11:40:24 +02:00
PVE::JSONSchema:: register_format ( 'pve-volume-id-or-qm-path' , \ & verify_volume_id_or_qm_path ) ;
sub verify_volume_id_or_qm_path {
2016-03-30 12:20:09 +02:00
my ( $ volid , $ noerr ) = @ _ ;
2022-03-09 11:09:11 +01:00
return $ volid if $ volid eq 'none' || $ volid eq 'cdrom' ;
return verify_volume_id_or_absolute_path ( $ volid , $ noerr ) ;
}
PVE::JSONSchema:: register_format ( 'pve-volume-id-or-absolute-path' , \ & verify_volume_id_or_absolute_path ) ;
sub verify_volume_id_or_absolute_path {
my ( $ volid , $ noerr ) = @ _ ;
return $ volid if $ volid =~ m | ^ / | ;
2016-04-21 11:40:24 +02:00
2016-03-30 12:20:09 +02:00
$ volid = eval { PVE::JSONSchema:: check_format ( 'pve-volume-id' , $ volid , '' ) } ;
if ( $@ ) {
2020-10-16 16:20:05 +02:00
return if $ noerr ;
2016-03-30 12:20:09 +02:00
die $@ ;
}
return $ volid ;
}
2016-03-30 12:20:07 +02:00
my $ usb_fmt = {
2016-02-11 15:49:09 +01:00
host = > {
default_key = > 1 ,
type = > 'string' , format = > 'pve-qm-usb-device' ,
format_description = > 'HOSTUSBDEVICE|spice' ,
2016-05-19 13:13:25 +02:00
description = > << EODESCR ,
The Host USB device or port or the value 'spice' . HOSTUSBDEVICE syntax is:
'bus-port(.port)*' ( decimal numbers ) or
'vendor_id:product_id' ( hexadeciaml numbers ) or
'spice'
You can use the 'lsusb -t' command to list existing usb devices .
2020-09-02 14:07:02 +02:00
NOTE: This option allows direct access to host hardware . So it is no longer possible to migrate such
machines - use with special care .
2016-05-19 13:13:25 +02:00
The value 'spice' can be used to add a usb redirection devices for spice .
EODESCR
2016-02-11 15:49:09 +01:00
} ,
usb3 = > {
optional = > 1 ,
type = > 'boolean' ,
2022-11-10 15:35:56 +01:00
description = > "Specifies whether if given host option is a USB3 device or port."
. " For modern guests (machine version >= 7.1 and ostype l26 and windows > 7), this flag"
. " is irrelevant (all devices are plugged into a xhci controller)." ,
2016-05-19 13:13:25 +02:00
default = > 0 ,
2016-02-11 15:49:09 +01:00
} ,
} ;
2011-08-23 07:47:04 +02:00
my $ usbdesc = {
optional = > 1 ,
2016-03-30 12:20:07 +02:00
type = > 'string' , format = > $ usb_fmt ,
2022-11-10 15:35:57 +01:00
description = > "Configure an USB device (n is 0 to 4, for machine version >= 7.1 and ostype"
. " l26 or windows > 7, n can be up to 14)." ,
2011-08-23 07:47:04 +02:00
} ;
PVE::JSONSchema:: register_standard_option ( "pve-qm-usb" , $ usbdesc ) ;
2011-09-11 09:00:00 +02:00
my $ serialdesc = {
optional = > 1 ,
2011-09-12 07:44:02 +02:00
type = > 'string' ,
2014-12-01 09:47:36 +01:00
pattern = > '(/dev/.+|socket)' ,
2016-05-19 13:13:25 +02:00
description = > "Create a serial device inside the VM (n is 0 to 3)" ,
verbose_description = > << EODESCR ,
Create a serial device inside the VM ( n is 0 to 3 ) , and pass through a
host serial device ( i . e . /dev/ ttyS0 ) , or create a unix socket on the
host side ( use 'qm terminal' to open a terminal connection ) .
2011-09-11 09:00:00 +02:00
2020-09-02 14:07:02 +02:00
NOTE: If you pass through a host serial device , it is no longer possible to migrate such machines -
use with special care .
2011-09-11 09:00:00 +02:00
2016-05-19 13:13:25 +02:00
CAUTION: Experimental ! User reported problems with this option .
2011-09-11 09:00:00 +02:00
EODESCR
} ;
2011-09-11 09:00:01 +02:00
my $ paralleldesc = {
optional = > 1 ,
2011-09-12 07:44:02 +02:00
type = > 'string' ,
2013-08-14 12:18:54 +02:00
pattern = > '/dev/parport\d+|/dev/usb/lp\d+' ,
2016-05-19 13:13:25 +02:00
description = > "Map host parallel devices (n is 0 to 2)." ,
verbose_description = > << EODESCR ,
2011-09-12 12:26:00 +02:00
Map host parallel devices ( n is 0 to 2 ) .
2011-09-11 09:00:01 +02:00
2020-09-02 14:07:02 +02:00
NOTE: This option allows direct access to host hardware . So it is no longer possible to migrate such
machines - use with special care .
2011-09-11 09:00:01 +02:00
2016-05-19 13:13:25 +02:00
CAUTION: Experimental ! User reported problems with this option .
2011-09-11 09:00:01 +02:00
EODESCR
} ;
for ( my $ i = 0 ; $ i < $ MAX_PARALLEL_PORTS ; $ i + + ) {
$ confdesc - > { "parallel$i" } = $ paralleldesc ;
}
2011-09-11 09:00:00 +02:00
for ( my $ i = 0 ; $ i < $ MAX_SERIAL_PORTS ; $ i + + ) {
$ confdesc - > { "serial$i" } = $ serialdesc ;
}
2020-06-18 16:36:53 +02:00
for ( my $ i = 0 ; $ i < $ PVE:: QemuServer:: PCI:: MAX_HOSTPCI_DEVICES ; $ i + + ) {
$ confdesc - > { "hostpci$i" } = $ PVE:: QemuServer:: PCI:: hostpcidesc ;
2011-09-11 08:59:59 +02:00
}
2011-08-23 07:47:04 +02:00
2020-03-02 11:33:44 +01:00
for my $ key ( keys % { $ PVE:: QemuServer:: Drive:: drivedesc_hash } ) {
$ confdesc - > { $ key } = $ PVE:: QemuServer:: Drive:: drivedesc_hash - > { $ key } ;
2012-02-01 13:25:19 +01:00
}
2011-08-23 07:47:04 +02:00
for ( my $ i = 0 ; $ i < $ MAX_USB_DEVICES ; $ i + + ) {
$ confdesc - > { "usb$i" } = $ usbdesc ;
}
2020-10-06 15:32:14 +02:00
my $ boot_fmt = {
legacy = > {
optional = > 1 ,
default_key = > 1 ,
type = > 'string' ,
description = > "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
. " Deprecated, use 'order=' instead." ,
pattern = > '[acdn]{1,4}' ,
format_description = > "[acdn]{1,4}" ,
# note: this is also the fallback if boot: is not given at all
default = > 'cdn' ,
} ,
order = > {
optional = > 1 ,
type = > 'string' ,
format = > 'pve-qm-bootdev-list' ,
format_description = > "device[;device...]" ,
description = > << EODESC ,
The guest will attempt to boot from devices in the order they appear here .
Disks , optical drives and passed - through storage USB devices will be directly
booted from , NICs will load PXE , and PCIe devices will either behave like disks
( e . g . NVMe ) or load an option ROM ( e . g . RAID controller , hardware NIC ) .
Note that only devices in this list will be marked as bootable and thus loaded
by the guest firmware ( BIOS / UEFI ) . If you require multiple disks for booting
( e . g . software - raid ) , you need to specify all of them here .
Overrides the deprecated 'legacy=[acdn]*' value when given .
EODESC
} ,
} ;
PVE::JSONSchema:: register_format ( 'pve-qm-boot' , $ boot_fmt ) ;
PVE::JSONSchema:: register_format ( 'pve-qm-bootdev' , \ & verify_bootdev ) ;
sub verify_bootdev {
my ( $ dev , $ noerr ) = @ _ ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
my $ special = $ dev =~ m/^efidisk/ || $ dev =~ m/^tpmstate/ ;
return $ dev if PVE::QemuServer::Drive:: is_valid_drivename ( $ dev ) && ! $ special ;
2020-10-06 15:32:14 +02:00
my $ check = sub {
my ( $ base ) = @ _ ;
return 0 if $ dev !~ m/^$base\d+$/ ;
return 0 if ! $ confdesc - > { $ dev } ;
return 1 ;
} ;
return $ dev if $ check - > ( "net" ) ;
return $ dev if $ check - > ( "usb" ) ;
return $ dev if $ check - > ( "hostpci" ) ;
2020-10-16 16:20:05 +02:00
return if $ noerr ;
2020-10-06 15:32:14 +02:00
die "invalid boot device '$dev'\n" ;
}
sub print_bootorder {
my ( $ devs ) = @ _ ;
2020-10-21 11:00:25 +02:00
return "" if ! @$ devs ;
2020-10-06 15:32:14 +02:00
my $ data = { order = > join ( ';' , @$ devs ) } ;
return PVE::JSONSchema:: print_property_string ( $ data , $ boot_fmt ) ;
}
2011-08-23 07:47:04 +02:00
my $ kvm_api_version = 0 ;
sub kvm_version {
return $ kvm_api_version if $ kvm_api_version ;
2020-10-19 15:30:21 +02:00
open my $ fh , '<' , '/dev/kvm' or return ;
2011-08-23 07:47:04 +02:00
2018-11-12 14:10:33 +01:00
# 0xae00 => KVM_GET_API_VERSION
$ kvm_api_version = ioctl ( $ fh , 0xae00 , 0 ) ;
2020-10-19 15:30:21 +02:00
close ( $ fh ) ;
2011-08-23 07:47:04 +02:00
2018-11-12 14:10:33 +01:00
return $ kvm_api_version ;
2011-08-23 07:47:04 +02:00
}
2019-08-13 15:19:07 +02:00
my $ kvm_user_version = { } ;
my $ kvm_mtime = { } ;
2011-08-23 07:47:04 +02:00
sub kvm_user_version {
2019-08-13 15:19:07 +02:00
my ( $ binary ) = @ _ ;
2011-08-23 07:47:04 +02:00
2019-08-13 15:19:07 +02:00
$ binary // = get_command_for_arch ( get_host_arch ( ) ) ; # get the native arch by default
my $ st = stat ( $ binary ) ;
2011-08-23 07:47:04 +02:00
2019-08-13 15:19:07 +02:00
my $ cachedmtime = $ kvm_mtime - > { $ binary } // - 1 ;
return $ kvm_user_version - > { $ binary } if $ kvm_user_version - > { $ binary } &&
$ cachedmtime == $ st - > mtime ;
$ kvm_user_version - > { $ binary } = 'unknown' ;
$ kvm_mtime - > { $ binary } = $ st - > mtime ;
2011-08-23 07:47:04 +02:00
2016-02-25 14:47:17 +01:00
my $ code = sub {
my $ line = shift ;
if ( $ line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/ ) {
2019-08-13 15:19:07 +02:00
$ kvm_user_version - > { $ binary } = $ 2 ;
2016-02-25 14:47:17 +01:00
}
} ;
2011-09-12 12:26:00 +02:00
2019-08-13 15:19:07 +02:00
eval { run_command ( [ $ binary , '--version' ] , outfunc = > $ code ) ; } ;
2016-02-25 14:47:17 +01:00
warn $@ if $@ ;
2011-08-23 07:47:04 +02:00
2019-08-13 15:19:07 +02:00
return $ kvm_user_version - > { $ binary } ;
2011-08-23 07:47:04 +02:00
}
2020-09-02 14:07:02 +02:00
my sub extract_version {
my ( $ machine_type , $ version ) = @ _ ;
$ version = kvm_user_version ( ) if ! defined ( $ version ) ;
2021-11-03 16:18:00 +01:00
return PVE::QemuServer::Machine:: extract_version ( $ machine_type , $ version )
2020-09-02 14:07:02 +02:00
}
2011-08-23 07:47:04 +02:00
2018-12-20 10:44:13 +01:00
sub kernel_has_vhost_net {
return - c '/dev/vhost-net' ;
}
2011-08-23 07:47:04 +02:00
sub option_exists {
my $ key = shift ;
return defined ( $ confdesc - > { $ key } ) ;
2011-09-12 12:26:00 +02:00
}
2011-08-23 07:47:04 +02:00
my $ cdrom_path ;
sub get_cdrom_path {
2022-12-20 11:30:36 +01:00
return $ cdrom_path if defined ( $ cdrom_path ) ;
2011-08-23 07:47:04 +02:00
2023-01-13 13:52:39 +01:00
$ cdrom_path = first { - l $ _ } map { "/dev/cdrom$_" } ( '' , '1' , '2' ) ;
if ( ! defined ( $ cdrom_path ) ) {
2022-12-20 11:30:37 +01:00
log_warn ( "no physical CD-ROM available, ignoring" ) ;
$ cdrom_path = '' ;
}
2022-12-20 11:30:36 +01:00
return $ cdrom_path ;
2011-08-23 07:47:04 +02:00
}
sub get_iso_path {
my ( $ storecfg , $ vmid , $ cdrom ) = @ _ ;
if ( $ cdrom eq 'cdrom' ) {
return get_cdrom_path ( ) ;
} elsif ( $ cdrom eq 'none' ) {
return '' ;
} elsif ( $ cdrom =~ m | ^ / | ) {
return $ cdrom ;
} else {
2011-09-15 09:11:27 +02:00
return PVE::Storage:: path ( $ storecfg , $ cdrom ) ;
2011-08-23 07:47:04 +02:00
}
}
# try to convert old style file names to volume IDs
sub filename_to_volume_id {
my ( $ vmid , $ file , $ media ) = @ _ ;
2015-06-16 14:26:43 +02:00
if ( ! ( $ file eq 'none' || $ file eq 'cdrom' ||
2011-08-23 07:47:04 +02:00
$ file =~ m | ^ /dev/ . + | || $ file =~ m/^([^:]+):(.+)$/ ) ) {
2011-09-12 12:26:00 +02:00
2020-10-16 16:20:05 +02:00
return if $ file =~ m | / | ;
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
if ( $ media && $ media eq 'cdrom' ) {
$ file = "local:iso/$file" ;
} else {
$ file = "local:$vmid/$file" ;
}
}
return $ file ;
}
sub verify_media_type {
my ( $ opt , $ vtype , $ media ) = @ _ ;
return if ! $ media ;
my $ etype ;
if ( $ media eq 'disk' ) {
2012-05-23 07:24:15 +02:00
$ etype = 'images' ;
2011-08-23 07:47:04 +02:00
} elsif ( $ media eq 'cdrom' ) {
$ etype = 'iso' ;
} else {
die "internal error" ;
}
return if ( $ vtype eq $ etype ) ;
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
raise_param_exc ( { $ opt = > "unexpected media type ($vtype != $etype)" } ) ;
}
sub cleanup_drive_path {
my ( $ opt , $ storecfg , $ drive ) = @ _ ;
# try to convert filesystem paths to volume IDs
if ( ( $ drive - > { file } !~ m/^(cdrom|none)$/ ) &&
( $ drive - > { file } !~ m | ^ /dev/ . + | ) &&
( $ drive - > { file } !~ m/^([^:]+):(.+)$/ ) &&
2011-09-12 12:26:00 +02:00
( $ drive - > { file } !~ m/^\d+$/ ) ) {
2011-08-23 07:47:04 +02:00
my ( $ vtype , $ volid ) = PVE::Storage:: path_to_volume_id ( $ storecfg , $ drive - > { file } ) ;
2020-09-02 14:07:02 +02:00
raise_param_exc ( { $ opt = > "unable to associate path '$drive->{file}' to any storage" } )
if ! $ vtype ;
2011-08-23 07:47:04 +02:00
$ drive - > { media } = 'cdrom' if ! $ drive - > { media } && $ vtype eq 'iso' ;
verify_media_type ( $ opt , $ vtype , $ drive - > { media } ) ;
$ drive - > { file } = $ volid ;
}
$ drive - > { media } = 'cdrom' if ! $ drive - > { media } && $ drive - > { file } =~ m/^(cdrom|none)$/ ;
}
2015-01-27 07:16:22 +01:00
sub parse_hotplug_features {
my ( $ data ) = @ _ ;
my $ res = { } ;
return $ res if $ data eq '0' ;
2015-03-27 06:16:24 +01:00
2015-01-27 07:16:22 +01:00
$ data = $ confdesc - > { hotplug } - > { default } if $ data eq '1' ;
2015-01-27 11:05:36 +01:00
foreach my $ feature ( PVE::Tools:: split_list ( $ data ) ) {
2022-06-22 13:52:05 +02:00
if ( $ feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/ ) {
2015-01-27 07:16:22 +01:00
$ res - > { $ 1 } = 1 ;
} else {
2016-05-30 10:44:37 +02:00
die "invalid hotplug feature '$feature'\n" ;
2015-01-27 07:16:22 +01:00
}
}
return $ res ;
}
PVE::JSONSchema:: register_format ( 'pve-hotplug-features' , \ & pve_verify_hotplug_features ) ;
sub pve_verify_hotplug_features {
my ( $ value , $ noerr ) = @ _ ;
return $ value if parse_hotplug_features ( $ value ) ;
2020-10-16 16:20:05 +02:00
return if $ noerr ;
2015-01-27 07:16:22 +01:00
die "unable to parse hotplug option\n" ;
}
2012-03-19 10:32:52 +01:00
sub scsi_inquiry {
my ( $ fh , $ noerr ) = @ _ ;
my $ SG_IO = 0x2285 ;
my $ SG_GET_VERSION_NUM = 0x2282 ;
my $ versionbuf = "\x00" x 8 ;
my $ ret = ioctl ( $ fh , $ SG_GET_VERSION_NUM , $ versionbuf ) ;
if ( ! $ ret ) {
die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if ! $ noerr ;
2020-10-16 16:20:05 +02:00
return ;
2012-03-19 10:32:52 +01:00
}
2012-05-30 12:08:33 +02:00
my $ version = unpack ( "I" , $ versionbuf ) ;
2012-03-19 10:32:52 +01:00
if ( $ version < 30000 ) {
die "scsi generic interface too old\n" if ! $ noerr ;
2020-10-16 16:20:05 +02:00
return ;
2012-03-19 10:32:52 +01:00
}
2012-05-30 12:08:33 +02:00
2012-03-19 10:32:52 +01:00
my $ buf = "\x00" x 36 ;
my $ sensebuf = "\x00" x 8 ;
2013-07-15 13:12:18 +02:00
my $ cmd = pack ( "C x3 C x1" , 0x12 , 36 ) ;
2012-05-30 12:08:33 +02:00
2012-03-19 10:32:52 +01:00
# see /usr/include/scsi/sg.h
my $ sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I" ;
2021-11-03 16:19:10 +01:00
my $ packet = pack (
$ sg_io_hdr_t , ord ( 'S' ) , - 3 , length ( $ cmd ) , length ( $ sensebuf ) , 0 , length ( $ buf ) , $ buf , $ cmd , $ sensebuf , 6000
) ;
2012-03-19 10:32:52 +01:00
$ ret = ioctl ( $ fh , $ SG_IO , $ packet ) ;
if ( ! $ ret ) {
die "scsi ioctl SG_IO failed - $!\n" if ! $ noerr ;
2020-10-16 16:20:05 +02:00
return ;
2012-03-19 10:32:52 +01:00
}
2012-05-30 12:08:33 +02:00
2012-03-19 10:32:52 +01:00
my @ res = unpack ( $ sg_io_hdr_t , $ packet ) ;
if ( $ res [ 17 ] || $ res [ 18 ] ) {
die "scsi ioctl SG_IO status error - $!\n" if ! $ noerr ;
2020-10-16 16:20:05 +02:00
return ;
2012-03-19 10:32:52 +01:00
}
my $ res = { } ;
2021-11-03 16:19:10 +01:00
$ res - > @ { qw( type removable vendor product revision ) } = unpack ( "C C x6 A8 A16 A4" , $ buf ) ;
2012-03-19 10:32:52 +01:00
2021-11-03 16:19:10 +01:00
$ res - > { removable } = $ res - > { removable } & 128 ? 1 : 0 ;
$ res - > { type } & = 0x1F ;
2013-07-15 13:19:54 +02:00
2012-03-19 10:32:52 +01:00
return $ res ;
}
sub path_is_scsi {
my ( $ path ) = @ _ ;
2020-10-16 16:20:05 +02:00
my $ fh = IO::File - > new ( "+<$path" ) || return ;
2012-03-19 10:32:52 +01:00
my $ res = scsi_inquiry ( $ fh , 1 ) ;
close ( $ fh ) ;
return $ res ;
}
2014-06-18 06:54:45 +02:00
sub print_tabletdevice_full {
2018-11-12 14:10:42 +01:00
my ( $ conf , $ arch ) = @ _ ;
2014-11-10 06:31:08 +01:00
2019-11-19 12:23:48 +01:00
my $ q35 = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
2014-06-18 06:54:45 +02:00
# we use uhci for old VMs because tablet driver was buggy in older qemu
2018-11-12 14:10:42 +01:00
my $ usbbus ;
2022-11-10 15:35:52 +01:00
if ( $ q35 || $ arch eq 'aarch64' ) {
2018-11-12 14:10:42 +01:00
$ usbbus = 'ehci' ;
} else {
$ usbbus = 'uhci' ;
}
2014-11-10 06:31:08 +01:00
2014-06-18 06:54:45 +02:00
return "usb-tablet,id=tablet,bus=$usbbus.0,port=1" ;
}
2018-11-12 14:10:42 +01:00
sub print_keyboarddevice_full {
2021-11-03 16:20:41 +01:00
my ( $ conf , $ arch ) = @ _ ;
2018-11-12 14:10:42 +01:00
2020-10-16 16:20:05 +02:00
return if $ arch ne 'aarch64' ;
2018-11-12 14:10:42 +01:00
return "usb-kbd,id=keyboard,bus=ehci.0,port=2" ;
}
2021-04-06 10:12:08 +02:00
my sub get_drive_id {
my ( $ drive ) = @ _ ;
return "$drive->{interface}$drive->{index}" ;
}
2011-09-07 15:34:38 +02:00
sub print_drivedevice_full {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ vmid , $ drive , $ bridges , $ arch , $ machine_type ) = @ _ ;
2011-09-07 15:34:38 +02:00
my $ device = '' ;
my $ maxdev = 0 ;
2011-09-12 12:26:00 +02:00
2021-04-06 10:12:08 +02:00
my $ drive_id = get_drive_id ( $ drive ) ;
2011-09-07 15:34:38 +02:00
if ( $ drive - > { interface } eq 'virtio' ) {
2020-09-02 14:07:02 +02:00
my $ pciaddr = print_pci_addr ( "$drive_id" , $ bridges , $ arch , $ machine_type ) ;
$ device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}" ;
$ device . = ",iothread=iothread-$drive_id" if $ drive - > { iothread } ;
2011-12-07 11:41:27 +01:00
} elsif ( $ drive - > { interface } eq 'scsi' ) {
2015-03-27 03:41:52 +01:00
2015-03-27 03:41:54 +01:00
my ( $ maxdev , $ controller , $ controller_prefix ) = scsihw_infos ( $ conf , $ drive ) ;
2011-12-07 11:41:27 +01:00
my $ unit = $ drive - > { index } % $ maxdev ;
my $ devicetype = 'hd' ;
2016-02-25 11:43:01 +01:00
my $ path = '' ;
if ( drive_is_cdrom ( $ drive ) ) {
$ devicetype = 'cd' ;
2013-07-15 13:11:28 +02:00
} else {
2016-02-25 11:43:01 +01:00
if ( $ drive - > { file } =~ m | ^ / | ) {
$ path = $ drive - > { file } ;
if ( my $ info = path_is_scsi ( $ path ) ) {
2017-01-24 11:25:52 +01:00
if ( $ info - > { type } == 0 && $ drive - > { scsiblock } ) {
2016-02-25 11:43:01 +01:00
$ devicetype = 'block' ;
} elsif ( $ info - > { type } == 1 ) { # tape
$ devicetype = 'generic' ;
}
}
} else {
$ path = PVE::Storage:: path ( $ storecfg , $ drive - > { file } ) ;
}
2019-10-23 11:39:03 +02:00
# for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
2020-09-02 14:07:02 +02:00
my $ version = extract_version ( $ machine_type , kvm_user_version ( ) ) ;
2019-10-23 11:39:03 +02:00
if ( $ path =~ m/^iscsi\:\/\// &&
2019-11-19 12:23:49 +01:00
! min_version ( $ version , 4 , 1 ) ) {
2016-02-25 11:43:01 +01:00
$ devicetype = 'generic' ;
}
}
2021-04-13 10:24:14 +02:00
if ( ! $ conf - > { scsihw } || $ conf - > { scsihw } =~ m/^lsi/ || $ conf - > { scsihw } eq 'pvscsi' ) {
2020-09-02 14:07:02 +02:00
$ device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit" ;
2016-02-25 11:43:01 +01:00
} else {
2020-09-02 14:07:02 +02:00
$ device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
. ",lun=$drive->{index}" ;
2016-02-25 11:43:01 +01:00
}
2020-09-02 14:07:02 +02:00
$ device . = ",drive=drive-$drive_id,id=$drive_id" ;
2012-07-30 14:58:40 +02:00
Add `ssd` property to IDE, SATA, and SCSI drives
When enabled, the `ssd` property exposes drives as SSDs (rather than
rotational hard disks) by setting QEMU's `rotation_rate` property [1,
2] on `ide-hd`, `scsi-block`, and `scsi-hd` devices. This is required
to enable support for TRIM and SSD-specific optimizations in certain
guest operating systems that are limited to emulated controller types
(IDE, AHCI, and non-VirtIO SCSI).
This change also unifies the diverging IDE and SATA code paths in
QemuServer::print_drivedevice_full(), which suffered from:
* Code duplication: The only differences between IDE and SATA were in
bus-unit specification and maximum device counts.
* Inconsistent implementation: The IDE code used the new `ide-hd`
and `ide-cd` device types, whereas SATA still relied on the deprecated
`ide-drive` [3, 4] (which doesn't support `rotation_rate`).
* Different feature sets: The IDE code exposed a `model` property that
the SATA code didn't, even though QEMU supports it for both.
[1] https://bugzilla.redhat.com/show_bug.cgi?id=1498042
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-10/msg00698.html
[3] https://www.redhat.com/archives/libvir-list/2012-March/msg00684.html
[4] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg02024.html
Signed-off-by: Nick Chevsky <nchevsky@gmail.com>
2018-10-28 16:41:46 -04:00
if ( $ drive - > { ssd } && ( $ devicetype eq 'block' || $ devicetype eq 'hd' ) ) {
$ device . = ",rotation_rate=1" ;
}
2019-02-25 17:30:48 +01:00
$ device . = ",wwn=$drive->{wwn}" if $ drive - > { wwn } ;
Add `ssd` property to IDE, SATA, and SCSI drives
When enabled, the `ssd` property exposes drives as SSDs (rather than
rotational hard disks) by setting QEMU's `rotation_rate` property [1,
2] on `ide-hd`, `scsi-block`, and `scsi-hd` devices. This is required
to enable support for TRIM and SSD-specific optimizations in certain
guest operating systems that are limited to emulated controller types
(IDE, AHCI, and non-VirtIO SCSI).
This change also unifies the diverging IDE and SATA code paths in
QemuServer::print_drivedevice_full(), which suffered from:
* Code duplication: The only differences between IDE and SATA were in
bus-unit specification and maximum device counts.
* Inconsistent implementation: The IDE code used the new `ide-hd`
and `ide-cd` device types, whereas SATA still relied on the deprecated
`ide-drive` [3, 4] (which doesn't support `rotation_rate`).
* Different feature sets: The IDE code exposed a `model` property that
the SATA code didn't, even though QEMU supports it for both.
[1] https://bugzilla.redhat.com/show_bug.cgi?id=1498042
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-10/msg00698.html
[3] https://www.redhat.com/archives/libvir-list/2012-March/msg00684.html
[4] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg02024.html
Signed-off-by: Nick Chevsky <nchevsky@gmail.com>
2018-10-28 16:41:46 -04:00
} elsif ( $ drive - > { interface } eq 'ide' || $ drive - > { interface } eq 'sata' ) {
2020-03-02 11:33:44 +01:00
my $ maxdev = ( $ drive - > { interface } eq 'sata' ) ? $ PVE:: QemuServer:: Drive:: MAX_SATA_DISKS : 2 ;
2011-12-07 11:41:27 +01:00
my $ controller = int ( $ drive - > { index } / $ maxdev ) ;
my $ unit = $ drive - > { index } % $ maxdev ;
my $ devicetype = ( $ drive - > { media } && $ drive - > { media } eq 'cdrom' ) ? "cd" : "hd" ;
Add `ssd` property to IDE, SATA, and SCSI drives
When enabled, the `ssd` property exposes drives as SSDs (rather than
rotational hard disks) by setting QEMU's `rotation_rate` property [1,
2] on `ide-hd`, `scsi-block`, and `scsi-hd` devices. This is required
to enable support for TRIM and SSD-specific optimizations in certain
guest operating systems that are limited to emulated controller types
(IDE, AHCI, and non-VirtIO SCSI).
This change also unifies the diverging IDE and SATA code paths in
QemuServer::print_drivedevice_full(), which suffered from:
* Code duplication: The only differences between IDE and SATA were in
bus-unit specification and maximum device counts.
* Inconsistent implementation: The IDE code used the new `ide-hd`
and `ide-cd` device types, whereas SATA still relied on the deprecated
`ide-drive` [3, 4] (which doesn't support `rotation_rate`).
* Different feature sets: The IDE code exposed a `model` property that
the SATA code didn't, even though QEMU supports it for both.
[1] https://bugzilla.redhat.com/show_bug.cgi?id=1498042
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-10/msg00698.html
[3] https://www.redhat.com/archives/libvir-list/2012-March/msg00684.html
[4] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg02024.html
Signed-off-by: Nick Chevsky <nchevsky@gmail.com>
2018-10-28 16:41:46 -04:00
$ device = "ide-$devicetype" ;
if ( $ drive - > { interface } eq 'ide' ) {
$ device . = ",bus=ide.$controller,unit=$unit" ;
} else {
$ device . = ",bus=ahci$controller.$unit" ;
}
2020-09-02 14:07:02 +02:00
$ device . = ",drive=drive-$drive_id,id=$drive_id" ;
Add `ssd` property to IDE, SATA, and SCSI drives
When enabled, the `ssd` property exposes drives as SSDs (rather than
rotational hard disks) by setting QEMU's `rotation_rate` property [1,
2] on `ide-hd`, `scsi-block`, and `scsi-hd` devices. This is required
to enable support for TRIM and SSD-specific optimizations in certain
guest operating systems that are limited to emulated controller types
(IDE, AHCI, and non-VirtIO SCSI).
This change also unifies the diverging IDE and SATA code paths in
QemuServer::print_drivedevice_full(), which suffered from:
* Code duplication: The only differences between IDE and SATA were in
bus-unit specification and maximum device counts.
* Inconsistent implementation: The IDE code used the new `ide-hd`
and `ide-cd` device types, whereas SATA still relied on the deprecated
`ide-drive` [3, 4] (which doesn't support `rotation_rate`).
* Different feature sets: The IDE code exposed a `model` property that
the SATA code didn't, even though QEMU supports it for both.
[1] https://bugzilla.redhat.com/show_bug.cgi?id=1498042
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-10/msg00698.html
[3] https://www.redhat.com/archives/libvir-list/2012-March/msg00684.html
[4] https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg02024.html
Signed-off-by: Nick Chevsky <nchevsky@gmail.com>
2018-10-28 16:41:46 -04:00
if ( $ devicetype eq 'hd' ) {
if ( my $ model = $ drive - > { model } ) {
$ model = URI::Escape:: uri_unescape ( $ model ) ;
$ device . = ",model=$model" ;
}
if ( $ drive - > { ssd } ) {
$ device . = ",rotation_rate=1" ;
}
2015-09-30 10:23:27 +02:00
}
2019-02-25 17:30:48 +01:00
$ device . = ",wwn=$drive->{wwn}" if $ drive - > { wwn } ;
2011-12-07 11:41:27 +01:00
} elsif ( $ drive - > { interface } eq 'usb' ) {
die "implement me" ;
# -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
} else {
die "unsupported interface type" ;
2011-09-07 15:34:38 +02:00
}
2011-12-07 11:54:31 +01:00
$ device . = ",bootindex=$drive->{bootindex}" if $ drive - > { bootindex } ;
2018-04-05 10:54:41 +02:00
if ( my $ serial = $ drive - > { serial } ) {
$ serial = URI::Escape:: uri_unescape ( $ serial ) ;
$ device . = ",serial=$serial" ;
}
2011-09-07 15:34:38 +02:00
return $ device ;
}
2014-05-13 03:10:40 +02:00
sub get_initiator_name {
2014-05-17 09:07:18 +02:00
my $ initiator ;
2014-05-13 03:10:40 +02:00
2020-10-16 16:20:05 +02:00
my $ fh = IO::File - > new ( '/etc/iscsi/initiatorname.iscsi' ) || return ;
2014-05-17 09:07:18 +02:00
while ( defined ( my $ line = <$fh> ) ) {
next if $ line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/ ;
2014-05-13 03:10:40 +02:00
$ initiator = $ 1 ;
last ;
}
2014-05-17 09:07:18 +02:00
$ fh - > close ( ) ;
2014-05-13 03:10:40 +02:00
return $ initiator ;
}
2023-02-10 15:19:10 +01:00
my sub storage_allows_io_uring_default {
my ( $ scfg , $ cache_direct ) = @ _ ;
# io_uring with cache mode writeback or writethrough on krbd will hang...
return if $ scfg && $ scfg - > { type } eq 'rbd' && $ scfg - > { krbd } && ! $ cache_direct ;
# io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
# sometimes, just plain disable...
return if $ scfg && $ scfg - > { type } eq 'lvm' ;
# io_uring causes problems when used with CIFS since kernel 5.15
# Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
return if $ scfg && $ scfg - > { type } eq 'cifs' ;
return 1 ;
}
2023-02-10 15:19:11 +01:00
my sub drive_uses_cache_direct {
my ( $ drive , $ scfg ) = @ _ ;
my $ cache_direct = 0 ;
if ( my $ cache = $ drive - > { cache } ) {
$ cache_direct = $ cache =~ /^(?:off|none|directsync)$/ ;
} elsif ( ! drive_is_cdrom ( $ drive ) && ! ( $ scfg && $ scfg - > { type } eq 'btrfs' && ! $ scfg - > { nocow } ) ) {
$ cache_direct = 1 ;
}
return $ cache_direct ;
}
2020-03-02 11:33:45 +01:00
sub print_drive_commandline_full {
2021-06-21 17:33:18 +02:00
my ( $ storecfg , $ vmid , $ drive , $ pbs_name , $ io_uring ) = @ _ ;
2011-08-23 07:47:04 +02:00
2015-06-10 10:22:42 +02:00
my $ path ;
my $ volid = $ drive - > { file } ;
2021-03-03 10:56:08 +01:00
my $ format = $ drive - > { format } ;
2021-04-06 10:12:08 +02:00
my $ drive_id = get_drive_id ( $ drive ) ;
2019-05-03 14:22:38 +02:00
2021-06-24 16:58:33 +02:00
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
my $ scfg = $ storeid ? PVE::Storage:: storage_config ( $ storecfg , $ storeid ) : undef ;
2015-06-10 10:22:42 +02:00
if ( drive_is_cdrom ( $ drive ) ) {
$ path = get_iso_path ( $ storecfg , $ vmid , $ volid ) ;
2021-04-06 10:12:08 +02:00
die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $ pbs_name ;
2015-06-10 10:22:42 +02:00
} else {
if ( $ storeid ) {
$ path = PVE::Storage:: path ( $ storecfg , $ volid ) ;
2021-03-03 10:56:08 +01:00
$ format // = qemu_img_format ( $ scfg , $ volname ) ;
2015-06-10 10:22:42 +02:00
} else {
$ path = $ volid ;
2021-03-03 10:56:08 +01:00
$ format // = "raw" ;
2015-06-10 10:22:42 +02:00
}
}
2021-03-03 10:56:08 +01:00
my $ is_rbd = $ path =~ m/^rbd:/ ;
2011-08-23 07:47:04 +02:00
my $ opts = '' ;
2021-03-03 10:56:08 +01:00
my @ qemu_drive_options = qw( heads secs cyls trans media cache rerror werror aio discard ) ;
2011-08-23 07:47:04 +02:00
foreach my $ o ( @ qemu_drive_options ) {
2018-02-08 12:09:24 +01:00
$ opts . = ",$o=$drive->{$o}" if defined ( $ drive - > { $ o } ) ;
2011-09-12 12:26:00 +02:00
}
2018-02-08 12:09:23 +01:00
# snapshot only accepts on|off
if ( defined ( $ drive - > { snapshot } ) ) {
my $ v = $ drive - > { snapshot } ? 'on' : 'off' ;
$ opts . = ",snapshot=$v" ;
}
2021-10-27 17:00:24 +02:00
if ( defined ( $ drive - > { ro } ) ) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
$ opts . = ",readonly=" . ( $ drive - > { ro } ? 'on' : 'off' ) ;
2021-10-27 13:35:27 +02:00
}
2017-05-30 15:30:15 +02:00
foreach my $ type ( [ '' , '-total' ] , [ _rd = > '-read' ] , [ _wr = > '-write' ] ) {
my ( $ dir , $ qmpname ) = @$ type ;
if ( my $ v = $ drive - > { "mbps$dir" } ) {
$ opts . = ",throttling.bps$qmpname=" . int ( $ v * 1024 * 1024 ) ;
}
if ( my $ v = $ drive - > { "mbps${dir}_max" } ) {
$ opts . = ",throttling.bps$qmpname-max=" . int ( $ v * 1024 * 1024 ) ;
}
if ( my $ v = $ drive - > { "bps${dir}_max_length" } ) {
$ opts . = ",throttling.bps$qmpname-max-length=$v" ;
}
if ( my $ v = $ drive - > { "iops${dir}" } ) {
$ opts . = ",throttling.iops$qmpname=$v" ;
}
if ( my $ v = $ drive - > { "iops${dir}_max" } ) {
2017-07-14 14:36:16 +02:00
$ opts . = ",throttling.iops$qmpname-max=$v" ;
2017-05-30 15:30:15 +02:00
}
if ( my $ v = $ drive - > { "iops${dir}_max_length" } ) {
2017-07-14 14:36:16 +02:00
$ opts . = ",throttling.iops$qmpname-max-length=$v" ;
2017-05-30 15:30:15 +02:00
}
}
2021-03-03 10:56:08 +01:00
if ( $ pbs_name ) {
$ format = "rbd" if $ is_rbd ;
2021-04-06 10:12:08 +02:00
die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
if ! $ format ;
2021-03-03 10:56:08 +01:00
$ opts . = ",format=alloc-track,file.driver=$format" ;
} elsif ( $ format ) {
$ opts . = ",format=$format" ;
}
2015-06-10 10:22:42 +02:00
2023-02-10 15:19:11 +01:00
my $ cache_direct = drive_uses_cache_direct ( $ drive , $ scfg ) ;
2015-06-02 16:04:26 +02:00
2023-02-10 15:19:11 +01:00
$ opts . = ",cache=none" if ! $ drive - > { cache } && $ cache_direct ;
2015-06-02 16:04:26 +02:00
if ( ! $ drive - > { aio } ) {
2023-02-10 15:19:10 +01:00
if ( $ io_uring && storage_allows_io_uring_default ( $ scfg , $ cache_direct ) ) {
2021-06-21 17:33:18 +02:00
# io_uring supports all cache modes
$ opts . = ",aio=io_uring" ;
2015-06-02 16:04:26 +02:00
} else {
2021-06-21 17:33:18 +02:00
# aio native works only with O_DIRECT
if ( $ cache_direct ) {
$ opts . = ",aio=native" ;
} else {
$ opts . = ",aio=threads" ;
}
2015-06-02 16:04:26 +02:00
}
}
2013-02-22 14:35:51 +01:00
2015-12-17 14:58:12 +01:00
if ( ! drive_is_cdrom ( $ drive ) ) {
my $ detectzeroes ;
2016-01-29 10:08:08 +01:00
if ( defined ( $ drive - > { detect_zeroes } ) && ! $ drive - > { detect_zeroes } ) {
2015-12-17 14:58:12 +01:00
$ detectzeroes = 'off' ;
} elsif ( $ drive - > { discard } ) {
$ detectzeroes = $ drive - > { discard } eq 'on' ? 'unmap' : 'on' ;
} else {
# This used to be our default with discard not being specified:
$ detectzeroes = 'on' ;
}
2021-03-03 10:56:08 +01:00
# note: 'detect-zeroes' works per blockdev and we want it to persist
# after the alloc-track is removed, so put it on 'file' directly
my $ dz_param = $ pbs_name ? "file.detect-zeroes" : "detect-zeroes" ;
$ opts . = ",$dz_param=$detectzeroes" if $ detectzeroes ;
2015-12-17 14:58:12 +01:00
}
2014-10-13 09:45:30 +02:00
2021-03-03 10:56:08 +01:00
if ( $ pbs_name ) {
$ opts . = ",backing=$pbs_name" ;
$ opts . = ",auto-remove=on" ;
}
# my $file_param = $pbs_name ? "file.file.filename" : "file";
my $ file_param = "file" ;
if ( $ pbs_name ) {
# non-rbd drivers require the underlying file to be a seperate block
# node, so add a second .file indirection
$ file_param . = ".file" if ! $ is_rbd ;
$ file_param . = ".filename" ;
}
my $ pathinfo = $ path ? "$file_param=$path," : '' ;
2011-08-23 07:47:04 +02:00
2011-09-07 15:34:37 +02:00
return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts" ;
2011-08-23 07:47:04 +02:00
}
2021-03-03 10:56:08 +01:00
sub print_pbs_blockdev {
my ( $ pbs_conf , $ pbs_name ) = @ _ ;
my $ blockdev = "driver=pbs,node-name=$pbs_name,read-only=on" ;
$ blockdev . = ",repository=$pbs_conf->{repository}" ;
2022-06-22 14:57:35 +02:00
$ blockdev . = ",namespace=$pbs_conf->{namespace}" if $ pbs_conf - > { namespace } ;
2021-03-03 10:56:08 +01:00
$ blockdev . = ",snapshot=$pbs_conf->{snapshot}" ;
$ blockdev . = ",archive=$pbs_conf->{archive}" ;
$ blockdev . = ",keyfile=$pbs_conf->{keyfile}" if $ pbs_conf - > { keyfile } ;
return $ blockdev ;
}
2012-01-28 11:02:28 +01:00
sub print_netdevice_full {
2022-11-13 15:37:35 +01:00
my ( $ vmid , $ conf , $ net , $ netid , $ bridges , $ use_old_bios_files , $ arch , $ machine_type , $ machine_version ) = @ _ ;
2012-01-28 11:02:28 +01:00
my $ device = $ net - > { model } ;
if ( $ net - > { model } eq 'virtio' ) {
$ device = 'virtio-net-pci' ;
} ;
2018-11-12 14:10:42 +01:00
my $ pciaddr = print_pci_addr ( "$netid" , $ bridges , $ arch , $ machine_type ) ;
2015-01-09 06:56:14 +01:00
my $ tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid" ;
2014-06-10 08:30:31 +02:00
if ( $ net - > { queues } && $ net - > { queues } > 1 && $ net - > { model } eq 'virtio' ) {
2020-09-02 14:07:02 +02:00
# Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
# and out of each queue plus one config interrupt and control vector queue
2014-06-10 08:30:31 +02:00
my $ vectors = $ net - > { queues } * 2 + 2 ;
$ tmpstr . = ",vectors=$vectors,mq=on" ;
2022-11-13 15:37:35 +01:00
if ( min_version ( $ machine_version , 7 , 1 ) ) {
$ tmpstr . = ",packed=on" ;
}
2014-06-10 08:30:31 +02:00
}
2022-11-13 15:37:36 +01:00
if ( min_version ( $ machine_version , 7 , 1 ) && $ net - > { model } eq 'virtio' ) {
$ tmpstr . = ",rx_queue_size=1024,tx_queue_size=1024" ;
}
2012-01-28 11:02:28 +01:00
$ tmpstr . = ",bootindex=$net->{bootindex}" if $ net - > { bootindex } ;
2015-10-23 10:41:53 +02:00
2020-06-04 11:11:52 +02:00
if ( my $ mtu = $ net - > { mtu } ) {
if ( $ net - > { model } eq 'virtio' && $ net - > { bridge } ) {
my $ bridge_mtu = PVE::Network:: read_bridge_mtu ( $ net - > { bridge } ) ;
if ( $ mtu == 1 ) {
$ mtu = $ bridge_mtu ;
} elsif ( $ mtu < 576 ) {
die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n" ;
} elsif ( $ mtu > $ bridge_mtu ) {
die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n" ;
}
$ tmpstr . = ",host_mtu=$mtu" ;
} else {
warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n" ;
2020-04-17 07:47:20 +02:00
}
}
2015-10-23 10:41:53 +02:00
if ( $ use_old_bios_files ) {
my $ romfile ;
if ( $ device eq 'virtio-net-pci' ) {
$ romfile = 'pxe-virtio.rom' ;
} elsif ( $ device eq 'e1000' ) {
$ romfile = 'pxe-e1000.rom' ;
2021-07-16 19:09:57 +02:00
} elsif ( $ device eq 'e1000e' ) {
$ romfile = 'pxe-e1000e.rom' ;
2015-10-23 10:41:53 +02:00
} elsif ( $ device eq 'ne2k' ) {
$ romfile = 'pxe-ne2k_pci.rom' ;
} elsif ( $ device eq 'pcnet' ) {
$ romfile = 'pxe-pcnet.rom' ;
} elsif ( $ device eq 'rtl8139' ) {
$ romfile = 'pxe-rtl8139.rom' ;
}
$ tmpstr . = ",romfile=$romfile" if $ romfile ;
}
2012-01-28 11:02:28 +01:00
return $ tmpstr ;
}
sub print_netdev_full {
2018-11-12 14:10:42 +01:00
my ( $ vmid , $ conf , $ arch , $ net , $ netid , $ hotplug ) = @ _ ;
2012-01-28 11:02:28 +01:00
my $ i = '' ;
if ( $ netid =~ m/^net(\d+)$/ ) {
$ i = int ( $ 1 ) ;
}
die "got strange net id '$i'\n" if $ i >= $ { MAX_NETS } ;
my $ ifname = "tap${vmid}i$i" ;
# kvm uses TUNSETIFF ioctl, and that limits ifname length
die "interface name '$ifname' is too long (max 15 character)\n"
if length ( $ ifname ) >= 16 ;
my $ vhostparam = '' ;
2018-11-12 14:10:40 +01:00
if ( is_native ( $ arch ) ) {
2018-12-20 10:44:13 +01:00
$ vhostparam = ',vhost=on' if kernel_has_vhost_net ( ) && $ net - > { model } eq 'virtio' ;
2018-11-12 14:10:40 +01:00
}
2012-01-28 11:02:28 +01:00
my $ vmname = $ conf - > { name } || "vm$vmid" ;
2014-06-10 08:30:31 +02:00
my $ netdev = "" ;
2015-11-06 15:05:59 +01:00
my $ script = $ hotplug ? "pve-bridge-hotplug" : "pve-bridge" ;
2014-06-10 08:30:31 +02:00
2012-01-28 11:02:28 +01:00
if ( $ net - > { bridge } ) {
2020-09-02 14:07:02 +02:00
$ netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
. ",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam" ;
2012-01-28 11:02:28 +01:00
} else {
2014-06-10 08:30:31 +02:00
$ netdev = "type=user,id=$netid,hostname=$vmname" ;
2012-01-28 11:02:28 +01:00
}
2014-06-10 08:30:31 +02:00
$ netdev . = ",queues=$net->{queues}" if ( $ net - > { queues } && $ net - > { model } eq 'virtio' ) ;
return $ netdev ;
2012-01-28 11:02:28 +01:00
}
2011-08-23 07:47:04 +02:00
2018-11-09 13:31:09 +01:00
my $ vga_map = {
'cirrus' = > 'cirrus-vga' ,
'std' = > 'VGA' ,
'vmware' = > 'vmware-svga' ,
'virtio' = > 'virtio-vga' ,
2022-04-22 14:28:09 +02:00
'virtio-gl' = > 'virtio-vga-gl' ,
2018-11-09 13:31:09 +01:00
} ;
sub print_vga_device {
2019-11-19 12:23:49 +01:00
my ( $ conf , $ vga , $ arch , $ machine_version , $ machine , $ id , $ qxlnum , $ bridges ) = @ _ ;
2018-11-09 13:31:09 +01:00
my $ type = $ vga_map - > { $ vga - > { type } } ;
2018-12-17 09:19:58 +01:00
if ( $ arch eq 'aarch64' && defined ( $ type ) && $ type eq 'virtio-vga' ) {
2018-11-12 14:10:42 +01:00
$ type = 'virtio-gpu' ;
}
2018-11-09 13:31:09 +01:00
my $ vgamem_mb = $ vga - > { memory } ;
2019-11-19 16:18:19 +01:00
my $ max_outputs = '' ;
2018-11-09 13:31:09 +01:00
if ( $ qxlnum ) {
$ type = $ id ? 'qxl' : 'qxl-vga' ;
2019-11-19 16:18:19 +01:00
2019-11-20 15:10:16 +01:00
if ( ! $ conf - > { ostype } || $ conf - > { ostype } =~ m/^(?:l\d\d)|(?:other)$/ ) {
2019-11-19 16:18:19 +01:00
# set max outputs so linux can have up to 4 qxl displays with one device
2019-11-19 12:23:49 +01:00
if ( min_version ( $ machine_version , 4 , 1 ) ) {
2019-11-20 15:31:16 +01:00
$ max_outputs = ",max_outputs=4" ;
}
2019-11-19 16:18:19 +01:00
}
2018-11-09 13:31:09 +01:00
}
2019-11-19 16:18:19 +01:00
2018-11-09 13:31:09 +01:00
die "no devicetype for $vga->{type}\n" if ! $ type ;
my $ memory = "" ;
if ( $ vgamem_mb ) {
2022-04-22 14:28:09 +02:00
if ( $ vga - > { type } =~ /^virtio/ ) {
2018-11-09 13:31:09 +01:00
my $ bytes = PVE::Tools:: convert_size ( $ vgamem_mb , "mb" = > "b" ) ;
$ memory = ",max_hostmem=$bytes" ;
} elsif ( $ qxlnum ) {
# from https://www.spice-space.org/multiple-monitors.html
$ memory = ",vgamem_mb=$vga->{memory}" ;
my $ ram = $ vgamem_mb * 4 ;
my $ vram = $ vgamem_mb * 2 ;
$ memory . = ",ram_size_mb=$ram,vram_size_mb=$vram" ;
} else {
$ memory = ",vgamem_mb=$vga->{memory}" ;
}
} elsif ( $ qxlnum && $ id ) {
$ memory = ",ram_size=67108864,vram_size=33554432" ;
}
2020-07-27 15:55:25 +02:00
my $ edidoff = "" ;
if ( $ type eq 'VGA' && windows_version ( $ conf - > { ostype } ) ) {
2020-08-21 10:07:38 +02:00
$ edidoff = ",edid=off" if ( ! defined ( $ conf - > { bios } ) || $ conf - > { bios } ne 'ovmf' ) ;
2020-07-27 15:55:25 +02:00
}
2019-11-19 12:23:48 +01:00
my $ q35 = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
2018-11-09 13:31:09 +01:00
my $ vgaid = "vga" . ( $ id // '' ) ;
my $ pciaddr ;
if ( $ q35 && $ vgaid eq 'vga' ) {
2018-11-09 14:01:45 +01:00
# the first display uses pcie.0 bus on q35 machines
2018-11-12 14:10:42 +01:00
$ pciaddr = print_pcie_addr ( $ vgaid , $ bridges , $ arch , $ machine ) ;
2018-11-09 13:31:09 +01:00
} else {
2018-11-12 14:10:42 +01:00
$ pciaddr = print_pci_addr ( $ vgaid , $ bridges , $ arch , $ machine ) ;
2018-11-09 13:31:09 +01:00
}
2022-04-29 18:50:07 +02:00
if ( $ vga - > { type } eq 'virtio-gl' ) {
2022-05-02 17:20:39 +02:00
my $ base = '/usr/lib/x86_64-linux-gnu/lib' ;
die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
if ! - e "${base}EGL.so.1" || ! - e "${base}GL.so.1" ;
2022-04-29 18:50:07 +02:00
2022-05-02 17:20:39 +02:00
die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
if ! PVE::Tools:: dir_glob_regex ( '/dev/dri/' , "renderD.*" ) ;
2022-04-29 18:50:07 +02:00
}
2020-07-27 15:55:25 +02:00
return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}" ;
2018-11-09 13:31:09 +01:00
}
2016-03-30 12:20:10 +02:00
sub parse_number_sets {
my ( $ set ) = @ _ ;
my $ res = [] ;
foreach my $ part ( split ( /;/ , $ set ) ) {
if ( $ part =~ /^\s*(\d+)(?:-(\d+))?\s*$/ ) {
die "invalid range: $part ($2 < $1)\n" if defined ( $ 2 ) && $ 2 < $ 1 ;
push @$ res , [ $ 1 , $ 2 ] ;
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
} else {
2016-03-30 12:20:10 +02:00
die "invalid range: $part\n" ;
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
}
}
2016-03-30 12:20:10 +02:00
return $ res ;
}
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
2016-03-30 12:20:10 +02:00
sub parse_numa {
my ( $ data ) = @ _ ;
2020-09-02 14:07:02 +02:00
my $ res = parse_property_string ( $ numa_fmt , $ data ) ;
2016-03-30 12:20:10 +02:00
$ res - > { cpus } = parse_number_sets ( $ res - > { cpus } ) if defined ( $ res - > { cpus } ) ;
$ res - > { hostnodes } = parse_number_sets ( $ res - > { hostnodes } ) if defined ( $ res - > { hostnodes } ) ;
add custom numa topology support
numaX: cpus=<id[-id],memory=<mb>[[,hostnodes=<id[-id]>][,policy=<preferred|bind|interleave>]]
example:
-------
sockets:4
cores:2
memory:4096
numa: 1
numa0: cpus=0-1,memory=1024,hostnodes=0-1,policy=interleave
numa1: cpus=2-3,memory=3072,hostnodes=2,policy=bind
qemu command line
-----------------
-object memory-backend-ram,size=1024M,policy=interleave,host-nodes=0-1,id=ram-node0
-numa node,nodeid=0,cpus=0-1,memdev=ram-node0
-object memory-backend-ram,size=3072M,policy=bind,host-nodes=2,id=ram-node1
-numa node,nodeid=1,cpus=2-3,memdev=ram-node1
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2014-12-03 16:23:48 +01:00
return $ res ;
}
2011-08-23 07:47:04 +02:00
# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
sub parse_net {
2022-11-13 13:38:55 +01:00
my ( $ data , $ disable_mac_autogen ) = @ _ ;
2011-08-23 07:47:04 +02:00
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ net_fmt , $ data ) } ;
2016-03-30 12:20:11 +02:00
if ( $@ ) {
warn $@ ;
2020-10-16 16:20:05 +02:00
return ;
2011-08-23 07:47:04 +02:00
}
2022-11-13 13:38:55 +01:00
if ( ! defined ( $ res - > { macaddr } ) && ! $ disable_mac_autogen ) {
2016-07-13 16:25:44 +02:00
my $ dc = PVE::Cluster:: cfs_read_file ( 'datacenter.cfg' ) ;
$ res - > { macaddr } = PVE::Tools:: random_ether_addr ( $ dc - > { mac_prefix } ) ;
}
2015-06-16 14:26:43 +02:00
return $ res ;
}
# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
sub parse_ipconfig {
my ( $ data ) = @ _ ;
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ ipconfig_fmt , $ data ) } ;
2015-06-16 14:26:43 +02:00
if ( $@ ) {
warn $@ ;
2020-10-16 16:20:05 +02:00
return ;
2015-06-16 14:26:43 +02:00
}
if ( $ res - > { gw } && ! $ res - > { ip } ) {
warn 'gateway specified without specifying an IP address' ;
2020-10-16 16:20:05 +02:00
return ;
2015-06-16 14:26:43 +02:00
}
if ( $ res - > { gw6 } && ! $ res - > { ip6 } ) {
warn 'IPv6 gateway specified without specifying an IPv6 address' ;
2020-10-16 16:20:05 +02:00
return ;
2015-06-16 14:26:43 +02:00
}
if ( $ res - > { gw } && $ res - > { ip } eq 'dhcp' ) {
warn 'gateway specified together with DHCP' ;
2020-10-16 16:20:05 +02:00
return ;
2015-06-16 14:26:43 +02:00
}
if ( $ res - > { gw6 } && $ res - > { ip6 } !~ /^$IPV6RE/ ) {
# gw6 + auto/dhcp
warn "IPv6 gateway specified together with $res->{ip6} address" ;
2020-10-16 16:20:05 +02:00
return ;
2015-06-16 14:26:43 +02:00
}
if ( ! $ res - > { ip } && ! $ res - > { ip6 } ) {
return { ip = > 'dhcp' , ip6 = > 'dhcp' } ;
}
2011-08-23 07:47:04 +02:00
return $ res ;
}
sub print_net {
my $ net = shift ;
2016-03-30 12:20:11 +02:00
return PVE::JSONSchema:: print_property_string ( $ net , $ net_fmt ) ;
2011-08-23 07:47:04 +02:00
}
sub add_random_macs {
my ( $ settings ) = @ _ ;
foreach my $ opt ( keys %$ settings ) {
next if $ opt !~ m/^net(\d+)$/ ;
my $ net = parse_net ( $ settings - > { $ opt } ) ;
next if ! $ net ;
$ settings - > { $ opt } = print_net ( $ net ) ;
}
}
2014-11-17 07:08:44 +01:00
sub vm_is_volid_owner {
my ( $ storecfg , $ vmid , $ volid ) = @ _ ;
if ( $ volid !~ m | ^ / | ) {
my ( $ path , $ owner ) ;
eval { ( $ path , $ owner ) = PVE::Storage:: path ( $ storecfg , $ volid ) ; } ;
if ( $ owner && ( $ owner == $ vmid ) ) {
return 1 ;
}
}
2020-10-16 16:20:05 +02:00
return ;
2014-11-17 07:08:44 +01:00
}
sub vmconfig_register_unused_drive {
my ( $ storecfg , $ vmid , $ conf , $ drive ) = @ _ ;
2016-04-04 10:04:10 +02:00
if ( drive_is_cloudinit ( $ drive ) ) {
eval { PVE::Storage:: vdisk_free ( $ storecfg , $ drive - > { file } ) } ;
warn $@ if $@ ;
2022-06-22 13:52:00 +02:00
delete $ conf - > { cloudinit } ;
2016-04-04 10:04:10 +02:00
} elsif ( ! drive_is_cdrom ( $ drive ) ) {
2014-11-17 07:08:44 +01:00
my $ volid = $ drive - > { file } ;
if ( vm_is_volid_owner ( $ storecfg , $ vmid , $ volid ) ) {
2016-03-07 12:41:14 +01:00
PVE::QemuConfig - > add_unused_volume ( $ conf , $ volid , $ vmid ) ;
2014-11-17 07:08:44 +01:00
}
}
}
2019-06-11 12:13:52 +02:00
# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2016-03-30 12:20:07 +02:00
my $ smbios1_fmt = {
2016-01-14 13:33:55 +01:00
uuid = > {
type = > 'string' ,
pattern = > '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}' ,
format_description = > 'UUID' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 UUID." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
version = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 version." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
serial = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 serial number." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
manufacturer = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 manufacturer." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
product = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 product ID." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
sku = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 SKU string." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
family = > {
type = > 'string' ,
2019-06-11 12:13:52 +02:00
pattern = > '[A-Za-z0-9+\/]+={0,2}' ,
format_description = > 'Base64 encoded string' ,
2016-05-19 13:13:25 +02:00
description = > "Set SMBIOS1 family string." ,
2016-01-14 13:33:55 +01:00
optional = > 1 ,
} ,
2019-06-11 12:13:52 +02:00
base64 = > {
type = > 'boolean' ,
description = > 'Flag to indicate that the SMBIOS values are base64 encoded' ,
optional = > 1 ,
} ,
2014-06-26 11:12:25 +02:00
} ;
sub parse_smbios1 {
my ( $ data ) = @ _ ;
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ smbios1_fmt , $ data ) } ;
2016-01-14 13:33:55 +01:00
warn $@ if $@ ;
2014-06-26 11:12:25 +02:00
return $ res ;
}
2014-08-26 09:20:09 +02:00
sub print_smbios1 {
my ( $ smbios1 ) = @ _ ;
2016-03-30 12:20:07 +02:00
return PVE::JSONSchema:: print_property_string ( $ smbios1 , $ smbios1_fmt ) ;
2014-08-26 09:20:09 +02:00
}
2016-03-30 12:20:07 +02:00
PVE::JSONSchema:: register_format ( 'pve-qm-smbios1' , $ smbios1_fmt ) ;
2014-06-26 11:12:25 +02:00
2011-09-08 11:39:56 +02:00
sub parse_watchdog {
my ( $ value ) = @ _ ;
2020-10-16 16:20:05 +02:00
return if ! $ value ;
2011-09-08 11:39:56 +02:00
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ watchdog_fmt , $ value ) } ;
2016-03-30 12:20:13 +02:00
warn $@ if $@ ;
2011-09-08 11:39:56 +02:00
return $ res ;
}
2018-08-01 20:29:04 +02:00
sub parse_guest_agent {
2020-11-20 12:39:45 +01:00
my ( $ conf ) = @ _ ;
2018-08-01 20:29:04 +02:00
2020-11-20 12:39:45 +01:00
return { } if ! defined ( $ conf - > { agent } ) ;
2018-08-01 20:29:04 +02:00
2020-11-20 12:39:45 +01:00
my $ res = eval { parse_property_string ( $ agent_fmt , $ conf - > { agent } ) } ;
2018-08-01 20:29:04 +02:00
warn $@ if $@ ;
# if the agent is disabled ignore the other potentially set properties
return { } if ! $ res - > { enabled } ;
return $ res ;
}
2020-11-20 12:39:45 +01:00
sub get_qga_key {
my ( $ conf , $ key ) = @ _ ;
return undef if ! defined ( $ conf - > { agent } ) ;
my $ agent = parse_guest_agent ( $ conf ) ;
return $ agent - > { $ key } ;
}
2018-11-09 13:31:09 +01:00
sub parse_vga {
my ( $ value ) = @ _ ;
return { } if ! $ value ;
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ vga_fmt , $ value ) } ;
2018-11-09 13:31:09 +01:00
warn $@ if $@ ;
return $ res ;
}
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
sub parse_rng {
my ( $ value ) = @ _ ;
2020-10-16 16:20:05 +02:00
return if ! $ value ;
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
2020-09-02 14:07:02 +02:00
my $ res = eval { parse_property_string ( $ rng_fmt , $ value ) } ;
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
warn $@ if $@ ;
return $ res ;
}
2021-10-21 09:10:49 +02:00
sub parse_meta_info {
my ( $ value ) = @ _ ;
return if ! $ value ;
my $ res = eval { parse_property_string ( $ meta_info_fmt , $ value ) } ;
warn $@ if $@ ;
return $ res ;
}
sub new_meta_info_string {
my ( ) = @ _ ; # for now do not allow to override any value
return PVE::JSONSchema:: print_property_string (
{
2021-10-21 09:19:54 +02:00
'creation-qemu' = > kvm_user_version ( ) ,
2021-10-21 09:10:49 +02:00
ctime = > "" . int ( time ( ) ) ,
} ,
$ meta_info_fmt
) ;
}
2021-10-21 09:51:22 +02:00
sub qemu_created_version_fixups {
my ( $ conf , $ forcemachine , $ kvmver ) = @ _ ;
my $ meta = parse_meta_info ( $ conf - > { meta } ) // { } ;
my $ forced_vers = PVE::QemuServer::Machine:: extract_version ( $ forcemachine ) ;
# check if we need to apply some handling for VMs that always use the latest machine version but
# had a machine version transition happen that affected HW such that, e.g., an OS config change
# would be required (we do not want to pin machine version for non-windows OS type)
if (
( ! defined ( $ conf - > { machine } ) || $ conf - > { machine } =~ m/^(?:pc|q35|virt)$/ ) # non-versioned machine
&& ( ! defined ( $ meta - > { 'creation-qemu' } ) || ! min_version ( $ meta - > { 'creation-qemu' } , 6 , 1 ) ) # created before 6.1
&& ( ! $ forced_vers || min_version ( $ forced_vers , 6 , 1 ) ) # handle snapshot-rollback/migrations
&& min_version ( $ kvmver , 6 , 1 ) # only need to apply the change since 6.1
) {
my $ q35 = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
if ( $ q35 && $ conf - > { ostype } && $ conf - > { ostype } eq 'l26' ) {
# this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
# and thus with the predictable interface naming of systemd
return [ '-global' , 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off' ] ;
}
}
return ;
}
2011-08-23 07:47:04 +02:00
PVE::JSONSchema:: register_format ( 'pve-qm-usb-device' , \ & verify_usb_device ) ;
sub verify_usb_device {
my ( $ value , $ noerr ) = @ _ ;
return $ value if parse_usb_device ( $ value ) ;
2020-10-16 16:20:05 +02:00
return if $ noerr ;
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
die "unable to parse usb device\n" ;
}
# add JSON properties for create and set function
sub json_config_properties {
2022-03-17 12:31:04 +01:00
my ( $ prop , $ with_disk_alloc ) = @ _ ;
2011-08-23 07:47:04 +02:00
2021-10-21 08:23:08 +02:00
my $ skip_json_config_opts = {
parent = > 1 ,
snaptime = > 1 ,
vmstate = > 1 ,
runningmachine = > 1 ,
runningcpu = > 1 ,
2021-10-21 09:10:49 +02:00
meta = > 1 ,
2021-10-21 08:23:08 +02:00
} ;
2011-08-23 07:47:04 +02:00
foreach my $ opt ( keys %$ confdesc ) {
2021-10-21 08:23:08 +02:00
next if $ skip_json_config_opts - > { $ opt } ;
2022-03-17 12:31:04 +01:00
if ( $ with_disk_alloc && is_valid_drivename ( $ opt ) ) {
$ prop - > { $ opt } = $ PVE:: QemuServer:: Drive:: drivedesc_hash_with_alloc - > { $ opt } ;
} else {
$ prop - > { $ opt } = $ confdesc - > { $ opt } ;
}
2011-08-23 07:47:04 +02:00
}
return $ prop ;
}
2022-03-17 12:31:03 +01:00
# Properties that we can read from an OVF file
sub json_ovf_properties {
my $ prop = { } ;
for my $ device ( PVE::QemuServer::Drive:: valid_drive_names ( ) ) {
$ prop - > { $ device } = {
type = > 'string' ,
format = > 'pve-volume-id-or-absolute-path' ,
description = > "Disk image that gets imported to $device" ,
optional = > 1 ,
} ;
}
$ prop - > { cores } = {
type = > 'integer' ,
description = > "The number of CPU cores." ,
optional = > 1 ,
} ;
$ prop - > { memory } = {
type = > 'integer' ,
description = > "Amount of RAM for the VM in MB." ,
optional = > 1 ,
} ;
$ prop - > { name } = {
type = > 'string' ,
description = > "Name of the VM." ,
optional = > 1 ,
} ;
return $ prop ;
}
2018-03-07 09:26:33 +01:00
# return copy of $confdesc_cloudinit to generate documentation
sub cloudinit_config_properties {
return dclone ( $ confdesc_cloudinit ) ;
}
2022-11-16 18:14:06 +01:00
sub cloudinit_pending_properties {
my $ p = {
map { $ _ = > 1 } keys $ confdesc_cloudinit - > % * ,
name = > 1 ,
} ;
$ p - > { "net$_" } = 1 for 0 .. ( $ MAX_NETS - 1 ) ;
return $ p ;
}
2011-08-23 07:47:04 +02:00
sub check_type {
my ( $ key , $ value ) = @ _ ;
die "unknown setting '$key'\n" if ! $ confdesc - > { $ key } ;
my $ type = $ confdesc - > { $ key } - > { type } ;
2011-09-15 09:11:27 +02:00
if ( ! defined ( $ value ) ) {
2011-08-23 07:47:04 +02:00
die "got undefined value\n" ;
}
if ( $ value =~ m/[\n\r]/ ) {
die "property contains a line feed\n" ;
}
if ( $ type eq 'boolean' ) {
2011-09-12 12:26:00 +02:00
return 1 if ( $ value eq '1' ) || ( $ value =~ m/^(on|yes|true)$/i ) ;
return 0 if ( $ value eq '0' ) || ( $ value =~ m/^(off|no|false)$/i ) ;
die "type check ('boolean') failed - got '$value'\n" ;
2011-08-23 07:47:04 +02:00
} elsif ( $ type eq 'integer' ) {
return int ( $ 1 ) if $ value =~ m/^(\d+)$/ ;
die "type check ('integer') failed - got '$value'\n" ;
2012-12-30 19:03:00 +01:00
} elsif ( $ type eq 'number' ) {
return $ value if $ value =~ m/^(\d+)(\.\d+)?$/ ;
die "type check ('number') failed - got '$value'\n" ;
2011-08-23 07:47:04 +02:00
} elsif ( $ type eq 'string' ) {
if ( my $ fmt = $ confdesc - > { $ key } - > { format } ) {
PVE::JSONSchema:: check_format ( $ fmt , $ value ) ;
2011-09-12 12:26:00 +02:00
return $ value ;
}
2011-08-23 07:47:04 +02:00
$ value =~ s/^\"(.*)\"$/$1/ ;
2011-09-12 12:26:00 +02:00
return $ value ;
2011-08-23 07:47:04 +02:00
} else {
die "internal error"
}
}
sub destroy_vm {
2021-01-25 15:06:19 +01:00
my ( $ storecfg , $ vmid , $ skiplock , $ replacement_conf , $ purge_unreferenced ) = @ _ ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
2023-01-05 15:51:56 +01:00
if ( ! $ skiplock && ! PVE::QemuConfig - > has_lock ( $ conf , 'suspended' ) ) {
PVE::QemuConfig - > check_lock ( $ conf ) ;
}
2011-08-23 07:47:04 +02:00
2017-10-13 10:00:53 +02:00
if ( $ conf - > { template } ) {
# check if any base image is still used by a linked clone
2021-04-19 10:46:43 +02:00
PVE::QemuConfig - > foreach_volume_full ( $ conf , { include_unused = > 1 } , sub {
2017-10-13 10:00:53 +02:00
my ( $ ds , $ drive ) = @ _ ;
return if drive_is_cdrom ( $ drive ) ;
my $ volid = $ drive - > { file } ;
return if ! $ volid || $ volid =~ m | ^ / | ;
die "base volume '$volid' is still in use by linked cloned\n"
if PVE::Storage:: volume_is_base_and_used ( $ storecfg , $ volid ) ;
} ) ;
}
2021-09-29 11:45:07 +02:00
my $ volids = { } ;
2021-04-19 10:46:42 +02:00
my $ remove_owned_drive = sub {
2011-08-23 07:47:04 +02:00
my ( $ ds , $ drive ) = @ _ ;
2015-08-04 14:31:19 +02:00
return if drive_is_cdrom ( $ drive , 1 ) ;
2011-08-23 07:47:04 +02:00
my $ volid = $ drive - > { file } ;
2011-11-25 08:05:36 +01:00
return if ! $ volid || $ volid =~ m | ^ / | ;
2021-09-29 11:45:07 +02:00
return if $ volids - > { $ volid } ;
2011-08-23 07:47:04 +02:00
2011-09-15 09:11:27 +02:00
my ( $ path , $ owner ) = PVE::Storage:: path ( $ storecfg , $ volid ) ;
2011-11-25 08:05:36 +01:00
return if ! $ path || ! $ owner || ( $ owner != $ vmid ) ;
2011-08-23 07:47:04 +02:00
2021-09-29 11:45:07 +02:00
$ volids - > { $ volid } = 1 ;
2019-11-08 15:35:32 +01:00
eval { PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) } ;
2016-12-20 12:30:57 +01:00
warn "Could not remove disk '$volid', check manually: $@" if $@ ;
2021-04-19 10:46:42 +02:00
} ;
# only remove disks owned by this VM (referenced in the config)
my $ include_opts = {
include_unused = > 1 ,
extra_keys = > [ 'vmstate' ] ,
} ;
PVE::QemuConfig - > foreach_volume_full ( $ conf , $ include_opts , $ remove_owned_drive ) ;
for my $ snap ( values % { $ conf - > { snapshots } } ) {
next if ! defined ( $ snap - > { vmstate } ) ;
my $ drive = PVE::QemuConfig - > parse_volume ( 'vmstate' , $ snap - > { vmstate } , 1 ) ;
next if ! defined ( $ drive ) ;
$ remove_owned_drive - > ( 'vmstate' , $ drive ) ;
}
2011-09-12 12:26:00 +02:00
2021-09-29 11:45:07 +02:00
PVE::QemuConfig - > foreach_volume_full ( $ conf - > { pending } , $ include_opts , $ remove_owned_drive ) ;
2021-01-25 15:06:19 +01:00
if ( $ purge_unreferenced ) { # also remove unreferenced disk
2021-03-22 15:32:43 +01:00
my $ vmdisks = PVE::Storage:: vdisk_list ( $ storecfg , undef , $ vmid , undef , 'images' ) ;
2021-01-25 15:06:19 +01:00
PVE::Storage:: foreach_volid ( $ vmdisks , sub {
my ( $ volid , $ sid , $ volname , $ d ) = @ _ ;
eval { PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) } ;
warn $@ if $@ ;
} ) ;
}
2019-10-25 11:24:01 +02:00
2019-11-08 17:03:28 +01:00
if ( defined $ replacement_conf ) {
2019-11-14 09:49:26 +01:00
PVE::QemuConfig - > write_config ( $ vmid , $ replacement_conf ) ;
2019-10-25 11:24:01 +02:00
} else {
PVE::QemuConfig - > destroy_config ( $ vmid ) ;
}
2011-08-23 07:47:04 +02:00
}
sub parse_vm_config {
2022-02-09 14:07:39 +01:00
my ( $ filename , $ raw , $ strict ) = @ _ ;
2011-08-23 07:47:04 +02:00
2020-10-16 16:20:05 +02:00
return if ! defined ( $ raw ) ;
2011-08-23 07:47:04 +02:00
2011-09-07 11:41:34 +02:00
my $ res = {
2012-03-20 12:25:08 +01:00
digest = > Digest::SHA:: sha1_hex ( $ raw ) ,
2012-09-07 11:51:19 +02:00
snapshots = > { } ,
2014-11-11 06:52:10 +01:00
pending = > { } ,
2022-06-22 13:52:00 +02:00
cloudinit = > { } ,
2011-09-07 11:41:34 +02:00
} ;
2011-08-23 07:47:04 +02:00
2022-02-09 14:07:39 +01:00
my $ handle_error = sub {
my ( $ msg ) = @ _ ;
if ( $ strict ) {
die $ msg ;
} else {
warn $ msg ;
}
} ;
2011-09-12 12:26:00 +02:00
$ filename =~ m | /qemu-server/ ( \ d + ) \ . conf $|
2011-08-23 07:47:04 +02:00
|| die "got strange filename '$filename'" ;
my $ vmid = $ 1 ;
2012-09-07 11:51:19 +02:00
my $ conf = $ res ;
2015-08-11 11:24:41 +02:00
my $ descr ;
2022-11-16 11:23:01 +01:00
my $ finish_description = sub {
if ( defined ( $ descr ) ) {
$ descr =~ s/\s+$// ;
$ conf - > { description } = $ descr ;
}
$ descr = undef ;
} ;
2014-11-11 07:01:01 +01:00
my $ section = '' ;
2012-03-01 08:13:14 +01:00
2012-09-07 11:51:19 +02:00
my @ lines = split ( /\n/ , $ raw ) ;
foreach my $ line ( @ lines ) {
2011-08-23 07:47:04 +02:00
next if $ line =~ m/^\s*$/ ;
2013-07-15 09:13:31 +02:00
2014-10-30 13:40:22 +01:00
if ( $ line =~ m/^\[PENDING\]\s*$/i ) {
2014-11-11 07:01:01 +01:00
$ section = 'pending' ;
2022-11-16 11:23:01 +01:00
$ finish_description - > ( ) ;
2014-11-11 07:01:01 +01:00
$ conf = $ res - > { $ section } = { } ;
2014-10-30 13:40:22 +01:00
next ;
2022-06-22 13:52:00 +02:00
} elsif ( $ line =~ m/^\[special:cloudinit\]\s*$/i ) {
$ section = 'cloudinit' ;
2022-11-16 11:32:14 +01:00
$ finish_description - > ( ) ;
2022-06-22 13:52:00 +02:00
$ conf = $ res - > { $ section } = { } ;
next ;
2014-10-30 13:40:22 +01:00
2014-11-11 06:52:10 +01:00
} elsif ( $ line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i ) {
2014-11-11 07:01:01 +01:00
$ section = $ 1 ;
2022-11-16 11:23:01 +01:00
$ finish_description - > ( ) ;
2014-11-11 07:01:01 +01:00
$ conf = $ res - > { snapshots } - > { $ section } = { } ;
2012-09-07 11:51:19 +02:00
next ;
}
2011-08-23 07:47:04 +02:00
2022-02-24 15:21:51 +01:00
if ( $ line =~ m/^\#(.*)$/ ) {
2015-08-11 11:24:41 +02:00
$ descr = '' if ! defined ( $ descr ) ;
2012-03-01 08:13:14 +01:00
$ descr . = PVE::Tools:: decode_text ( $ 1 ) . "\n" ;
next ;
}
2011-08-23 07:47:04 +02:00
if ( $ line =~ m/^(description):\s*(.*\S)\s*$/ ) {
2015-08-11 11:24:41 +02:00
$ descr = '' if ! defined ( $ descr ) ;
2012-03-01 08:13:14 +01:00
$ descr . = PVE::Tools:: decode_text ( $ 2 ) ;
2012-09-07 11:51:19 +02:00
} elsif ( $ line =~ m/snapstate:\s*(prepare|delete)\s*$/ ) {
$ conf - > { snapstate } = $ 1 ;
2011-08-23 07:47:04 +02:00
} elsif ( $ line =~ m/^(args):\s*(.*\S)\s*$/ ) {
my $ key = $ 1 ;
my $ value = $ 2 ;
2012-09-07 11:51:19 +02:00
$ conf - > { $ key } = $ value ;
2014-11-11 07:40:07 +01:00
} elsif ( $ line =~ m/^delete:\s*(.*\S)\s*$/ ) {
2014-11-11 07:01:01 +01:00
my $ value = $ 1 ;
2014-11-11 07:40:07 +01:00
if ( $ section eq 'pending' ) {
$ conf - > { delete } = $ value ; # we parse this later
} else {
2022-02-09 14:07:39 +01:00
$ handle_error - > ( "vm $vmid - property 'delete' is only allowed in [PENDING]\n" ) ;
2014-10-30 13:40:22 +01:00
}
2016-04-04 12:15:34 +02:00
} elsif ( $ line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/ ) {
2011-08-23 07:47:04 +02:00
my $ key = $ 1 ;
my $ value = $ 2 ;
2022-11-17 12:46:11 +01:00
if ( $ section eq 'cloudinit' ) {
# ignore validation only used for informative purpose
$ conf - > { $ key } = $ value ;
next ;
}
2011-08-23 07:47:04 +02:00
eval { $ value = check_type ( $ key , $ value ) ; } ;
if ( $@ ) {
2022-02-09 14:07:39 +01:00
$ handle_error - > ( "vm $vmid - unable to parse value of '$key' - $@" ) ;
2011-08-23 07:47:04 +02:00
} else {
2016-10-18 10:38:58 +02:00
$ key = 'ide2' if $ key eq 'cdrom' ;
2011-08-23 07:47:04 +02:00
my $ fmt = $ confdesc - > { $ key } - > { format } ;
2016-10-18 10:38:58 +02:00
if ( $ fmt && $ fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/ ) {
2011-08-23 07:47:04 +02:00
my $ v = parse_drive ( $ key , $ value ) ;
if ( my $ volid = filename_to_volume_id ( $ vmid , $ v - > { file } , $ v - > { media } ) ) {
$ v - > { file } = $ volid ;
2019-12-05 16:11:01 +01:00
$ value = print_drive ( $ v ) ;
2011-08-23 07:47:04 +02:00
} else {
2022-02-09 14:07:39 +01:00
$ handle_error - > ( "vm $vmid - unable to parse value of '$key'\n" ) ;
2011-08-23 07:47:04 +02:00
next ;
}
}
2016-10-18 10:38:58 +02:00
$ conf - > { $ key } = $ value ;
2011-08-23 07:47:04 +02:00
}
2021-03-03 12:01:26 +01:00
} else {
2022-02-09 14:07:39 +01:00
$ handle_error - > ( "vm $vmid - unable to parse config: $line\n" ) ;
2011-08-23 07:47:04 +02:00
}
}
2022-11-16 11:23:01 +01:00
$ finish_description - > ( ) ;
2012-09-07 11:51:19 +02:00
delete $ res - > { snapstate } ; # just to be sure
2011-08-23 07:47:04 +02:00
return $ res ;
}
2012-02-02 14:01:08 +01:00
sub write_vm_config {
my ( $ filename , $ conf ) = @ _ ;
2011-08-23 07:47:04 +02:00
2012-09-07 11:51:19 +02:00
delete $ conf - > { snapstate } ; # just to be sure
2012-02-02 14:01:08 +01:00
if ( $ conf - > { cdrom } ) {
die "option ide2 conflicts with cdrom\n" if $ conf - > { ide2 } ;
$ conf - > { ide2 } = $ conf - > { cdrom } ;
delete $ conf - > { cdrom } ;
}
2011-08-23 07:47:04 +02:00
# we do not use 'smp' any longer
2012-02-02 14:01:08 +01:00
if ( $ conf - > { sockets } ) {
delete $ conf - > { smp } ;
} elsif ( $ conf - > { smp } ) {
$ conf - > { sockets } = $ conf - > { smp } ;
delete $ conf - > { cores } ;
delete $ conf - > { smp } ;
2011-08-23 07:47:04 +02:00
}
2012-09-10 11:49:32 +02:00
my $ used_volids = { } ;
2012-09-07 11:51:19 +02:00
2012-09-10 11:49:32 +02:00
my $ cleanup_config = sub {
2014-11-11 07:40:07 +01:00
my ( $ cref , $ pending , $ snapname ) = @ _ ;
2012-02-02 14:01:08 +01:00
2012-09-10 11:49:32 +02:00
foreach my $ key ( keys %$ cref ) {
next if $ key eq 'digest' || $ key eq 'description' || $ key eq 'snapshots' ||
2022-06-22 13:52:00 +02:00
$ key eq 'snapstate' || $ key eq 'pending' || $ key eq 'cloudinit' ;
2012-09-10 11:49:32 +02:00
my $ value = $ cref - > { $ key } ;
2014-11-11 07:40:07 +01:00
if ( $ key eq 'delete' ) {
die "propertry 'delete' is only allowed in [PENDING]\n"
if ! $ pending ;
# fixme: check syntax?
next ;
}
2012-09-10 11:49:32 +02:00
eval { $ value = check_type ( $ key , $ value ) ; } ;
die "unable to parse value of '$key' - $@" if $@ ;
2012-02-02 14:01:08 +01:00
2012-09-10 11:49:32 +02:00
$ cref - > { $ key } = $ value ;
2016-03-03 15:45:15 +01:00
if ( ! $ snapname && is_valid_drivename ( $ key ) ) {
2013-01-04 06:57:11 +01:00
my $ drive = parse_drive ( $ key , $ value ) ;
2012-09-10 11:49:32 +02:00
$ used_volids - > { $ drive - > { file } } = 1 if $ drive && $ drive - > { file } ;
}
2011-08-23 07:47:04 +02:00
}
2012-09-10 11:49:32 +02:00
} ;
& $ cleanup_config ( $ conf ) ;
2014-11-11 07:40:07 +01:00
& $ cleanup_config ( $ conf - > { pending } , 1 ) ;
2012-09-10 11:49:32 +02:00
foreach my $ snapname ( keys % { $ conf - > { snapshots } } ) {
2019-10-24 13:53:09 +02:00
die "internal error: snapshot name '$snapname' is forbidden" if lc ( $ snapname ) eq 'pending' ;
2014-11-11 07:40:07 +01:00
& $ cleanup_config ( $ conf - > { snapshots } - > { $ snapname } , undef , $ snapname ) ;
2011-08-23 07:47:04 +02:00
}
2012-02-02 14:01:08 +01:00
# remove 'unusedX' settings if we re-add a volume
foreach my $ key ( keys %$ conf ) {
my $ value = $ conf - > { $ key } ;
2012-09-10 11:49:32 +02:00
if ( $ key =~ m/^unused/ && $ used_volids - > { $ value } ) {
2012-02-02 14:01:08 +01:00
delete $ conf - > { $ key } ;
2011-08-23 07:47:04 +02:00
}
2012-02-02 14:01:08 +01:00
}
2013-07-15 09:13:31 +02:00
2012-09-07 11:51:19 +02:00
my $ generate_raw_config = sub {
2015-08-11 11:24:41 +02:00
my ( $ conf , $ pending ) = @ _ ;
2012-03-01 08:13:14 +01:00
2012-09-07 11:51:19 +02:00
my $ raw = '' ;
# add description as comment to top of file
2015-08-11 11:24:41 +02:00
if ( defined ( my $ descr = $ conf - > { description } ) ) {
if ( $ descr ) {
foreach my $ cl ( split ( /\n/ , $ descr ) ) {
$ raw . = '#' . PVE::Tools:: encode_text ( $ cl ) . "\n" ;
}
} else {
$ raw . = "#\n" if $ pending ;
}
2012-09-07 11:51:19 +02:00
}
foreach my $ key ( sort keys %$ conf ) {
2022-06-22 13:52:00 +02:00
next if $ key =~ /^(digest|description|pending|cloudinit|snapshots)$/ ;
2012-09-07 11:51:19 +02:00
$ raw . = "$key: $conf->{$key}\n" ;
}
return $ raw ;
} ;
2012-03-01 08:13:14 +01:00
2012-09-07 11:51:19 +02:00
my $ raw = & $ generate_raw_config ( $ conf ) ;
2014-11-11 07:40:07 +01:00
if ( scalar ( keys % { $ conf - > { pending } } ) ) {
$ raw . = "\n[PENDING]\n" ;
2015-08-11 11:24:41 +02:00
$ raw . = & $ generate_raw_config ( $ conf - > { pending } , 1 ) ;
2014-11-11 07:40:07 +01:00
}
2022-11-16 12:50:29 +01:00
if ( scalar ( keys % { $ conf - > { cloudinit } } ) && PVE::QemuConfig - > has_cloudinit ( $ conf ) ) {
2022-06-22 13:52:00 +02:00
$ raw . = "\n[special:cloudinit]\n" ;
$ raw . = & $ generate_raw_config ( $ conf - > { cloudinit } ) ;
}
2012-09-07 11:51:19 +02:00
foreach my $ snapname ( sort keys % { $ conf - > { snapshots } } ) {
$ raw . = "\n[$snapname]\n" ;
$ raw . = & $ generate_raw_config ( $ conf - > { snapshots } - > { $ snapname } ) ;
2012-02-02 14:01:08 +01:00
}
2011-08-23 07:47:04 +02:00
2012-02-02 14:01:08 +01:00
return $ raw ;
}
2011-08-23 07:47:04 +02:00
2011-09-12 12:26:00 +02:00
sub load_defaults {
2011-08-23 07:47:04 +02:00
my $ res = { } ;
# we use static defaults from our JSON schema configuration
foreach my $ key ( keys %$ confdesc ) {
if ( defined ( my $ default = $ confdesc - > { $ key } - > { default } ) ) {
$ res - > { $ key } = $ default ;
}
}
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
return $ res ;
}
sub config_list {
my $ vmlist = PVE::Cluster:: get_vmlist ( ) ;
my $ res = { } ;
return $ res if ! $ vmlist || ! $ vmlist - > { ids } ;
my $ ids = $ vmlist - > { ids } ;
2019-12-10 11:05:39 +01:00
my $ nodename = nodename ( ) ;
2011-08-23 07:47:04 +02:00
foreach my $ vmid ( keys %$ ids ) {
my $ d = $ ids - > { $ vmid } ;
next if ! $ d - > { node } || $ d - > { node } ne $ nodename ;
2011-09-26 12:20:05 +02:00
next if ! $ d - > { type } || $ d - > { type } ne 'qemu' ;
2011-08-23 07:47:04 +02:00
$ res - > { $ vmid } - > { exists } = 1 ;
}
return $ res ;
}
2011-09-09 12:13:21 +02:00
# test if VM uses local resources (to prevent migration)
sub check_local_resources {
my ( $ conf , $ noerr ) = @ _ ;
2019-05-03 14:22:39 +02:00
my @ loc_res = ( ) ;
2011-09-12 12:26:00 +02:00
2019-05-03 14:22:39 +02:00
push @ loc_res , "hostusb" if $ conf - > { hostusb } ; # old syntax
push @ loc_res , "hostpci" if $ conf - > { hostpci } ; # old syntax
2011-09-09 12:13:21 +02:00
2019-05-03 14:22:39 +02:00
push @ loc_res , "ivshmem" if $ conf - > { ivshmem } ;
2019-02-22 11:38:33 +01:00
2011-09-09 14:18:11 +02:00
foreach my $ k ( keys %$ conf ) {
2019-09-11 14:43:33 +02:00
next if $ k =~ m/^usb/ && ( $ conf - > { $ k } =~ m/^spice(?![^,])/ ) ;
2015-11-09 11:32:02 +01:00
# sockets are safe: they will recreated be on the target side post-migrate
next if $ k =~ m/^serial/ && ( $ conf - > { $ k } eq 'socket' ) ;
2019-05-03 14:22:39 +02:00
push @ loc_res , $ k if $ k =~ m/^(usb|hostpci|serial|parallel)\d+$/ ;
2011-09-09 12:13:21 +02:00
}
2019-05-03 14:22:39 +02:00
die "VM uses local resources\n" if scalar @ loc_res && ! $ noerr ;
2011-09-09 12:13:21 +02:00
2019-05-03 14:22:39 +02:00
return \ @ loc_res ;
2011-09-09 12:13:21 +02:00
}
2013-05-06 08:56:17 +02:00
# check if used storages are available on all nodes (use by migrate)
2012-03-30 09:13:31 +02:00
sub check_storage_availability {
my ( $ storecfg , $ conf , $ node ) = @ _ ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2012-03-30 09:13:31 +02:00
my ( $ ds , $ drive ) = @ _ ;
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
my ( $ sid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
return if ! $ sid ;
# check if storage is available on both nodes
2021-06-18 12:59:33 +02:00
my $ scfg = PVE::Storage:: storage_check_enabled ( $ storecfg , $ sid ) ;
PVE::Storage:: storage_check_enabled ( $ storecfg , $ sid , $ node ) ;
2021-06-18 12:59:34 +02:00
2021-06-23 12:03:35 +02:00
my ( $ vtype ) = PVE::Storage:: parse_volname ( $ storecfg , $ volid ) ;
die "$volid: content type '$vtype' is not available on storage '$sid'\n"
if ! $ scfg - > { content } - > { $ vtype } ;
2012-03-30 09:13:31 +02:00
} ) ;
}
2013-05-06 08:56:17 +02:00
# list nodes where all VM images are available (used by has_feature API)
sub shared_nodes {
my ( $ conf , $ storecfg ) = @ _ ;
my $ nodelist = PVE::Cluster:: get_nodelist ( ) ;
my $ nodehash = { map { $ _ = > 1 } @$ nodelist } ;
2019-12-10 11:05:39 +01:00
my $ nodename = nodename ( ) ;
2013-07-15 09:13:31 +02:00
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2013-05-06 08:56:17 +02:00
my ( $ ds , $ drive ) = @ _ ;
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
if ( $ storeid ) {
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
if ( $ scfg - > { disable } ) {
$ nodehash = { } ;
} elsif ( my $ avail = $ scfg - > { nodes } ) {
foreach my $ node ( keys %$ nodehash ) {
delete $ nodehash - > { $ node } if ! $ avail - > { $ node } ;
}
} elsif ( ! $ scfg - > { shared } ) {
foreach my $ node ( keys %$ nodehash ) {
delete $ nodehash - > { $ node } if $ node ne $ nodename
}
}
}
} ) ;
return $ nodehash
}
2019-06-28 15:13:45 +02:00
sub check_local_storage_availability {
my ( $ conf , $ storecfg ) = @ _ ;
my $ nodelist = PVE::Cluster:: get_nodelist ( ) ;
my $ nodehash = { map { $ _ = > { } } @$ nodelist } ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2019-06-28 15:13:45 +02:00
my ( $ ds , $ drive ) = @ _ ;
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
if ( $ storeid ) {
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
if ( $ scfg - > { disable } ) {
foreach my $ node ( keys %$ nodehash ) {
2019-06-28 17:23:44 +02:00
$ nodehash - > { $ node } - > { unavailable_storages } - > { $ storeid } = 1 ;
2019-06-28 15:13:45 +02:00
}
} elsif ( my $ avail = $ scfg - > { nodes } ) {
foreach my $ node ( keys %$ nodehash ) {
if ( ! $ avail - > { $ node } ) {
2019-06-28 17:23:44 +02:00
$ nodehash - > { $ node } - > { unavailable_storages } - > { $ storeid } = 1 ;
2019-06-28 15:13:45 +02:00
}
}
}
}
} ) ;
2019-06-28 17:23:44 +02:00
foreach my $ node ( values %$ nodehash ) {
if ( my $ unavail = $ node - > { unavailable_storages } ) {
$ node - > { unavailable_storages } = [ sort keys %$ unavail ] ;
}
}
2019-06-28 15:13:45 +02:00
return $ nodehash
}
2019-11-19 12:23:46 +01:00
# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2011-08-23 07:47:04 +02:00
sub check_running {
2012-08-21 12:21:51 +02:00
my ( $ vmid , $ nocheck , $ node ) = @ _ ;
2011-08-23 07:47:04 +02:00
2022-11-21 13:16:05 +01:00
# $nocheck is set when called during a migration, in which case the config
# file might still or already reside on the *other* node
# - because rename has already happened, and current node is source
# - because rename hasn't happened yet, and current node is target
# - because rename has happened, current node is target, but hasn't yet
# processed it yet
2019-11-19 12:23:46 +01:00
PVE::QemuConfig:: assert_config_exists_on_node ( $ vmid , $ node ) if ! $ nocheck ;
return PVE::QemuServer::Helpers:: vm_running_locally ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
}
sub vzlist {
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
my $ vzlist = config_list ( ) ;
2019-11-19 12:23:44 +01:00
my $ fd = IO::Dir - > new ( $ PVE:: QemuServer:: Helpers:: var_run_tmpdir ) || return $ vzlist ;
2011-08-23 07:47:04 +02:00
2011-09-12 12:26:00 +02:00
while ( defined ( my $ de = $ fd - > read ) ) {
2011-08-23 07:47:04 +02:00
next if $ de !~ m/^(\d+)\.pid$/ ;
my $ vmid = $ 1 ;
2011-09-15 09:11:27 +02:00
next if ! defined ( $ vzlist - > { $ vmid } ) ;
if ( my $ pid = check_running ( $ vmid ) ) {
2011-08-23 07:47:04 +02:00
$ vzlist - > { $ vmid } - > { pid } = $ pid ;
}
}
return $ vzlist ;
}
2018-08-01 12:55:29 +02:00
our $ vmstatus_return_properties = {
vmid = > get_standard_option ( 'pve-vmid' ) ,
status = > {
2022-12-20 10:23:32 +01:00
description = > "QEMU process status." ,
2018-08-01 12:55:29 +02:00
type = > 'string' ,
enum = > [ 'stopped' , 'running' ] ,
} ,
maxmem = > {
description = > "Maximum memory in bytes." ,
type = > 'integer' ,
optional = > 1 ,
renderer = > 'bytes' ,
} ,
maxdisk = > {
description = > "Root disk size in bytes." ,
type = > 'integer' ,
optional = > 1 ,
renderer = > 'bytes' ,
} ,
name = > {
description = > "VM name." ,
type = > 'string' ,
optional = > 1 ,
} ,
qmpstatus = > {
2022-12-20 10:23:32 +01:00
description = > "QEMU QMP agent status." ,
2018-08-01 12:55:29 +02:00
type = > 'string' ,
optional = > 1 ,
} ,
pid = > {
description = > "PID of running qemu process." ,
type = > 'integer' ,
optional = > 1 ,
} ,
uptime = > {
description = > "Uptime." ,
type = > 'integer' ,
optional = > 1 ,
renderer = > 'duration' ,
} ,
cpus = > {
description = > "Maximum usable CPUs." ,
type = > 'number' ,
optional = > 1 ,
} ,
2019-03-20 11:29:01 +01:00
lock = > {
2019-03-21 12:53:17 +01:00
description = > "The current config lock, if any." ,
2019-03-20 11:29:01 +01:00
type = > 'string' ,
optional = > 1 ,
2019-10-31 13:36:25 +01:00
} ,
tags = > {
description = > "The current configured tags, if any" ,
type = > 'string' ,
optional = > 1 ,
} ,
2021-03-01 16:53:26 +01:00
'running-machine' = > {
description = > "The currently running machine type (if running)." ,
type = > 'string' ,
optional = > 1 ,
} ,
'running-qemu' = > {
description = > "The currently running QEMU version (if running)." ,
type = > 'string' ,
optional = > 1 ,
} ,
2018-08-01 12:55:29 +02:00
} ;
2011-08-23 07:47:04 +02:00
my $ last_proc_pid_stat ;
2012-07-13 09:25:58 +02:00
# get VM status information
# This must be fast and should not block ($full == false)
# We only query KVM using QMP if $full == true (this can be slow)
2011-08-23 07:47:04 +02:00
sub vmstatus {
2012-07-13 09:25:58 +02:00
my ( $ opt_vmid , $ full ) = @ _ ;
2011-08-23 07:47:04 +02:00
my $ res = { } ;
2011-09-12 12:26:00 +02:00
my $ storecfg = PVE::Storage:: config ( ) ;
2011-08-23 07:47:04 +02:00
my $ list = vzlist ( ) ;
2017-12-12 11:56:15 +01:00
my $ defaults = load_defaults ( ) ;
2011-09-14 07:55:34 +02:00
my ( $ uptime ) = PVE::ProcFSTools:: read_proc_uptime ( 1 ) ;
2011-08-23 07:47:04 +02:00
2011-11-18 09:35:32 +01:00
my $ cpucount = $ cpuinfo - > { cpus } || 1 ;
2011-08-23 07:47:04 +02:00
foreach my $ vmid ( keys %$ list ) {
next if $ opt_vmid && ( $ vmid ne $ opt_vmid ) ;
2019-10-23 16:09:36 +02:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
2021-06-01 08:43:06 +02:00
my $ d = { vmid = > int ( $ vmid ) } ;
2021-06-18 13:36:40 +02:00
$ d - > { pid } = int ( $ list - > { $ vmid } - > { pid } ) if $ list - > { $ vmid } - > { pid } ;
2011-08-23 07:47:04 +02:00
# fixme: better status?
$ d - > { status } = $ list - > { $ vmid } - > { pid } ? 'running' : 'stopped' ;
2020-03-02 11:33:45 +01:00
my $ size = PVE::QemuServer::Drive:: bootdisk_size ( $ storecfg , $ conf ) ;
2012-08-01 13:44:54 +02:00
if ( defined ( $ size ) ) {
$ d - > { disk } = 0 ; # no info available
2011-08-23 07:47:04 +02:00
$ d - > { maxdisk } = $ size ;
} else {
$ d - > { disk } = 0 ;
$ d - > { maxdisk } = 0 ;
}
2017-12-12 11:56:15 +01:00
$ d - > { cpus } = ( $ conf - > { sockets } || $ defaults - > { sockets } )
* ( $ conf - > { cores } || $ defaults - > { cores } ) ;
2011-11-18 09:35:32 +01:00
$ d - > { cpus } = $ cpucount if $ d - > { cpus } > $ cpucount ;
2015-03-08 14:07:41 +01:00
$ d - > { cpus } = $ conf - > { vcpus } if $ conf - > { vcpus } ;
2011-11-18 09:35:32 +01:00
2011-08-23 07:47:04 +02:00
$ d - > { name } = $ conf - > { name } || "VM $vmid" ;
2017-12-12 11:56:15 +01:00
$ d - > { maxmem } = $ conf - > { memory } ? $ conf - > { memory } * ( 1024 * 1024 )
: $ defaults - > { memory } * ( 1024 * 1024 ) ;
2011-08-23 07:47:04 +02:00
2012-12-19 07:24:39 +01:00
if ( $ conf - > { balloon } ) {
2012-12-19 09:08:16 +01:00
$ d - > { balloon_min } = $ conf - > { balloon } * ( 1024 * 1024 ) ;
2017-12-12 11:56:15 +01:00
$ d - > { shares } = defined ( $ conf - > { shares } ) ? $ conf - > { shares }
: $ defaults - > { shares } ;
2012-12-19 07:24:39 +01:00
}
2011-08-23 07:47:04 +02:00
$ d - > { uptime } = 0 ;
$ d - > { cpu } = 0 ;
$ d - > { mem } = 0 ;
$ d - > { netout } = 0 ;
$ d - > { netin } = 0 ;
$ d - > { diskread } = 0 ;
$ d - > { diskwrite } = 0 ;
2021-03-11 11:26:47 +01:00
$ d - > { template } = 1 if PVE::QemuConfig - > is_template ( $ conf ) ;
2013-02-14 11:58:56 +01:00
2018-01-26 11:57:59 +01:00
$ d - > { serial } = 1 if conf_has_serial ( $ conf ) ;
2019-03-20 11:29:01 +01:00
$ d - > { lock } = $ conf - > { lock } if $ conf - > { lock } ;
2019-10-31 13:36:25 +01:00
$ d - > { tags } = $ conf - > { tags } if defined ( $ conf - > { tags } ) ;
2018-01-26 11:57:59 +01:00
2011-08-23 07:47:04 +02:00
$ res - > { $ vmid } = $ d ;
}
my $ netdev = PVE::ProcFSTools:: read_proc_net_dev ( ) ;
foreach my $ dev ( keys %$ netdev ) {
next if $ dev !~ m/^tap([1-9]\d*)i/ ;
my $ vmid = $ 1 ;
my $ d = $ res - > { $ vmid } ;
next if ! $ d ;
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
$ d - > { netout } += $ netdev - > { $ dev } - > { receive } ;
$ d - > { netin } += $ netdev - > { $ dev } - > { transmit } ;
2015-06-16 12:44:49 +02:00
if ( $ full ) {
2021-06-01 08:43:06 +02:00
$ d - > { nics } - > { $ dev } - > { netout } = int ( $ netdev - > { $ dev } - > { receive } ) ;
$ d - > { nics } - > { $ dev } - > { netin } = int ( $ netdev - > { $ dev } - > { transmit } ) ;
2015-06-16 12:44:49 +02:00
}
2011-08-23 07:47:04 +02:00
}
my $ ctime = gettimeofday ;
foreach my $ vmid ( keys %$ list ) {
my $ d = $ res - > { $ vmid } ;
my $ pid = $ d - > { pid } ;
next if ! $ pid ;
2011-09-14 07:55:34 +02:00
my $ pstat = PVE::ProcFSTools:: read_proc_pid_stat ( $ pid ) ;
next if ! $ pstat ; # not running
2011-09-12 12:26:00 +02:00
2011-09-14 07:55:34 +02:00
my $ used = $ pstat - > { utime } + $ pstat - > { stime } ;
2011-08-23 07:47:04 +02:00
2011-09-14 07:55:34 +02:00
$ d - > { uptime } = int ( ( $ uptime - $ pstat - > { starttime } ) / $ cpuinfo - > { user_hz } ) ;
2011-08-23 07:47:04 +02:00
2011-09-14 07:55:34 +02:00
if ( $ pstat - > { vsize } ) {
2011-09-15 09:11:27 +02:00
$ d - > { mem } = int ( ( $ pstat - > { rss } / $ pstat - > { vsize } ) * $ d - > { maxmem } ) ;
2011-08-23 07:47:04 +02:00
}
my $ old = $ last_proc_pid_stat - > { $ pid } ;
if ( ! $ old ) {
2011-09-12 12:26:00 +02:00
$ last_proc_pid_stat - > { $ pid } = {
time = > $ ctime ,
2011-08-23 07:47:04 +02:00
used = > $ used ,
cpu = > 0 ,
} ;
next ;
}
2011-09-12 11:03:14 +02:00
my $ dtime = ( $ ctime - $ old - > { time } ) * $ cpucount * $ cpuinfo - > { user_hz } ;
2011-08-23 07:47:04 +02:00
if ( $ dtime > 1000 ) {
my $ dutime = $ used - $ old - > { used } ;
2011-11-18 09:35:32 +01:00
$ d - > { cpu } = ( ( $ dutime /$dtime)* $cpucount) / $ d - > { cpus } ;
2011-08-23 07:47:04 +02:00
$ last_proc_pid_stat - > { $ pid } = {
2011-09-12 12:26:00 +02:00
time = > $ ctime ,
2011-08-23 07:47:04 +02:00
used = > $ used ,
cpu = > $ d - > { cpu } ,
} ;
} else {
$ d - > { cpu } = $ old - > { cpu } ;
}
}
2012-08-23 07:36:48 +02:00
return $ res if ! $ full ;
2012-07-13 09:25:58 +02:00
my $ qmpclient = PVE::QMPClient - > new ( ) ;
2012-12-18 12:36:18 +01:00
my $ ballooncb = sub {
my ( $ vmid , $ resp ) = @ _ ;
my $ info = $ resp - > { 'return' } ;
2015-03-09 08:14:03 +01:00
return if ! $ info - > { max_mem } ;
2012-12-18 12:36:18 +01:00
my $ d = $ res - > { $ vmid } ;
2015-03-09 08:14:03 +01:00
# use memory assigned to VM
$ d - > { maxmem } = $ info - > { max_mem } ;
$ d - > { balloon } = $ info - > { actual } ;
if ( defined ( $ info - > { total_mem } ) && defined ( $ info - > { free_mem } ) ) {
$ d - > { mem } = $ info - > { total_mem } - $ info - > { free_mem } ;
$ d - > { freemem } = $ info - > { free_mem } ;
2012-12-18 12:36:18 +01:00
}
2015-06-16 12:44:49 +02:00
$ d - > { ballooninfo } = $ info ;
2012-12-18 12:36:18 +01:00
} ;
2012-07-13 09:25:58 +02:00
my $ blockstatscb = sub {
my ( $ vmid , $ resp ) = @ _ ;
my $ data = $ resp - > { 'return' } || [] ;
my $ totalrdbytes = 0 ;
my $ totalwrbytes = 0 ;
2015-06-16 12:44:49 +02:00
2012-07-13 09:25:58 +02:00
for my $ blockstat ( @$ data ) {
$ totalrdbytes = $ totalrdbytes + $ blockstat - > { stats } - > { rd_bytes } ;
$ totalwrbytes = $ totalwrbytes + $ blockstat - > { stats } - > { wr_bytes } ;
2015-06-16 12:44:49 +02:00
$ blockstat - > { device } =~ s/drive-// ;
$ res - > { $ vmid } - > { blockstat } - > { $ blockstat - > { device } } = $ blockstat - > { stats } ;
2012-07-13 09:25:58 +02:00
}
$ res - > { $ vmid } - > { diskread } = $ totalrdbytes ;
$ res - > { $ vmid } - > { diskwrite } = $ totalwrbytes ;
} ;
2021-03-01 16:53:26 +01:00
my $ machinecb = sub {
my ( $ vmid , $ resp ) = @ _ ;
my $ data = $ resp - > { 'return' } || [] ;
$ res - > { $ vmid } - > { 'running-machine' } =
PVE::QemuServer::Machine:: current_from_query_machines ( $ data ) ;
} ;
my $ versioncb = sub {
my ( $ vmid , $ resp ) = @ _ ;
my $ data = $ resp - > { 'return' } // { } ;
my $ version = 'unknown' ;
if ( my $ v = $ data - > { qemu } ) {
$ version = $ v - > { major } . "." . $ v - > { minor } . "." . $ v - > { micro } ;
}
$ res - > { $ vmid } - > { 'running-qemu' } = $ version ;
} ;
2012-07-13 09:25:58 +02:00
my $ statuscb = sub {
my ( $ vmid , $ resp ) = @ _ ;
2012-12-18 12:36:18 +01:00
2012-07-13 09:25:58 +02:00
$ qmpclient - > queue_cmd ( $ vmid , $ blockstatscb , 'query-blockstats' ) ;
2021-03-01 16:53:26 +01:00
$ qmpclient - > queue_cmd ( $ vmid , $ machinecb , 'query-machines' ) ;
$ qmpclient - > queue_cmd ( $ vmid , $ versioncb , 'query-version' ) ;
2012-12-18 12:36:18 +01:00
# this fails if ballon driver is not loaded, so this must be
# the last commnand (following command are aborted if this fails).
2015-03-09 08:14:03 +01:00
$ qmpclient - > queue_cmd ( $ vmid , $ ballooncb , 'query-balloon' ) ;
2012-07-13 09:25:58 +02:00
my $ status = 'unknown' ;
if ( ! defined ( $ status = $ resp - > { 'return' } - > { status } ) ) {
warn "unable to get VM status\n" ;
return ;
}
$ res - > { $ vmid } - > { qmpstatus } = $ resp - > { 'return' } - > { status } ;
} ;
foreach my $ vmid ( keys %$ list ) {
next if $ opt_vmid && ( $ vmid ne $ opt_vmid ) ;
next if ! $ res - > { $ vmid } - > { pid } ; # not running
$ qmpclient - > queue_cmd ( $ vmid , $ statuscb , 'query-status' ) ;
}
2016-12-20 10:11:56 +01:00
$ qmpclient - > queue_execute ( undef , 2 ) ;
2012-07-13 09:25:58 +02:00
2020-11-24 16:41:22 +01:00
foreach my $ vmid ( keys %$ list ) {
next if $ opt_vmid && ( $ vmid ne $ opt_vmid ) ;
2020-11-25 11:26:35 +01:00
next if ! $ res - > { $ vmid } - > { pid } ; #not running
2020-11-24 16:41:22 +01:00
# we can't use the $qmpclient since it might have already aborted on
# 'query-balloon', but this might also fail for older versions...
my $ qemu_support = eval { mon_cmd ( $ vmid , "query-proxmox-support" ) } ;
$ res - > { $ vmid } - > { 'proxmox-support' } = $ qemu_support // { } ;
}
2012-07-13 09:25:58 +02:00
foreach my $ vmid ( keys %$ list ) {
next if $ opt_vmid && ( $ vmid ne $ opt_vmid ) ;
$ res - > { $ vmid } - > { qmpstatus } = $ res - > { $ vmid } - > { status } if ! $ res - > { $ vmid } - > { qmpstatus } ;
}
2011-08-23 07:47:04 +02:00
return $ res ;
}
2018-01-26 11:57:59 +01:00
sub conf_has_serial {
my ( $ conf ) = @ _ ;
for ( my $ i = 0 ; $ i < $ MAX_SERIAL_PORTS ; $ i + + ) {
if ( $ conf - > { "serial$i" } ) {
return 1 ;
}
}
return 0 ;
}
2019-07-23 18:09:32 +02:00
sub conf_has_audio {
my ( $ conf , $ id ) = @ _ ;
$ id // = 0 ;
my $ audio = $ conf - > { "audio$id" } ;
2020-10-16 16:20:05 +02:00
return if ! defined ( $ audio ) ;
2019-07-23 18:09:32 +02:00
2020-09-02 14:07:02 +02:00
my $ audioproperties = parse_property_string ( $ audio_fmt , $ audio ) ;
2019-07-23 18:09:32 +02:00
my $ audiodriver = $ audioproperties - > { driver } // 'spice' ;
return {
dev = > $ audioproperties - > { device } ,
2019-07-24 15:06:20 +02:00
dev_id = > "audiodev$id" ,
2019-07-23 18:09:32 +02:00
backend = > $ audiodriver ,
backend_id = > "$audiodriver-backend${id}" ,
} ;
}
2020-04-17 15:20:05 +02:00
sub audio_devs {
2020-05-06 14:51:54 +02:00
my ( $ audio , $ audiopciaddr , $ machine_version ) = @ _ ;
2020-04-17 15:20:05 +02:00
my $ devs = [] ;
my $ id = $ audio - > { dev_id } ;
2020-05-06 14:51:54 +02:00
my $ audiodev = "" ;
if ( min_version ( $ machine_version , 4 , 2 ) ) {
$ audiodev = ",audiodev=$audio->{backend_id}" ;
}
2020-04-17 15:20:05 +02:00
if ( $ audio - > { dev } eq 'AC97' ) {
2020-05-06 14:51:54 +02:00
push @$ devs , '-device' , "AC97,id=${id}${audiopciaddr}$audiodev" ;
2020-04-17 15:20:05 +02:00
} elsif ( $ audio - > { dev } =~ /intel\-hda$/ ) {
push @$ devs , '-device' , "$audio->{dev},id=${id}${audiopciaddr}" ;
2020-05-06 14:51:54 +02:00
push @$ devs , '-device' , "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev" ;
push @$ devs , '-device' , "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev" ;
2020-04-17 15:20:05 +02:00
} else {
die "unkown audio device '$audio->{dev}', implement me!" ;
}
push @$ devs , '-audiodev' , "$audio->{backend},id=$audio->{backend_id}" ;
return $ devs ;
}
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
sub get_tpm_paths {
my ( $ vmid ) = @ _ ;
return {
socket = > "/var/run/qemu-server/$vmid.swtpm" ,
pid = > "/var/run/qemu-server/$vmid.swtpm.pid" ,
} ;
}
sub add_tpm_device {
my ( $ vmid , $ devices , $ conf ) = @ _ ;
return if ! $ conf - > { tpmstate0 } ;
my $ paths = get_tpm_paths ( $ vmid ) ;
push @$ devices , "-chardev" , "socket,id=tpmchar,path=$paths->{socket}" ;
push @$ devices , "-tpmdev" , "emulator,id=tpmdev,chardev=tpmchar" ;
push @$ devices , "-device" , "tpm-tis,tpmdev=tpmdev" ;
}
sub start_swtpm {
my ( $ storecfg , $ vmid , $ tpmdrive , $ migration ) = @ _ ;
return if ! $ tpmdrive ;
my $ state ;
my $ tpm = parse_drive ( "tpmstate0" , $ tpmdrive ) ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ tpm - > { file } , 1 ) ;
if ( $ storeid ) {
$ state = PVE::Storage:: map_volume ( $ storecfg , $ tpm - > { file } ) ;
} else {
$ state = $ tpm - > { file } ;
}
my $ paths = get_tpm_paths ( $ vmid ) ;
# during migration, we will get state from remote
#
if ( ! $ migration ) {
# run swtpm_setup to create a new TPM state if it doesn't exist yet
my $ setup_cmd = [
"swtpm_setup" ,
"--tpmstate" ,
"file://$state" ,
"--createek" ,
"--create-ek-cert" ,
"--create-platform-cert" ,
"--lock-nvram" ,
"--config" ,
"/etc/swtpm_setup.conf" , # do not use XDG configs
"--runas" ,
"0" , # force creation as root, error if not possible
"--not-overwrite" , # ignore existing state, do not modify
] ;
push @$ setup_cmd , "--tpm2" if $ tpm - > { version } eq 'v2.0' ;
# TPM 2.0 supports ECC crypto, use if possible
push @$ setup_cmd , "--ecc" if $ tpm - > { version } eq 'v2.0' ;
run_command ( $ setup_cmd , outfunc = > sub {
print "swtpm_setup: $1\n" ;
} ) ;
}
my $ emulator_cmd = [
"swtpm" ,
"socket" ,
"--tpmstate" ,
"backend-uri=file://$state,mode=0600" ,
"--ctrl" ,
"type=unixio,path=$paths->{socket},mode=0600" ,
"--pid" ,
"file=$paths->{pid}" ,
"--terminate" , # terminate on QEMU disconnect
"--daemon" ,
] ;
push @$ emulator_cmd , "--tpm2" if $ tpm - > { version } eq 'v2.0' ;
run_command ( $ emulator_cmd , outfunc = > sub { print $ 1 ; } ) ;
2021-10-18 09:40:03 +02:00
my $ tries = 100 ; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
2021-10-14 11:28:49 +02:00
while ( ! - e $ paths - > { pid } ) {
2021-10-18 09:42:36 +02:00
die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if - - $ tries == 0 ;
2021-10-18 09:40:03 +02:00
usleep ( 50_000 ) ;
2021-10-14 11:28:49 +02:00
}
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
# return untainted PID of swtpm daemon so it can be killed on error
file_read_firstline ( $ paths - > { pid } ) =~ m/(\d+)/ ;
return $ 1 ;
}
2013-07-24 11:42:48 +02:00
sub vga_conf_has_spice {
my ( $ vga ) = @ _ ;
2018-11-09 13:31:09 +01:00
my $ vgaconf = parse_vga ( $ vga ) ;
my $ vgatype = $ vgaconf - > { type } ;
return 0 if ! $ vgatype || $ vgatype !~ m/^qxl([234])?$/ ;
2013-10-02 09:11:57 +02:00
return $ 1 || 1 ;
2013-07-24 11:42:48 +02:00
}
2018-11-12 14:10:34 +01:00
sub is_native ($) {
my ( $ arch ) = @ _ ;
return get_host_arch ( ) eq $ arch ;
}
2019-11-25 08:56:58 +01:00
sub get_vm_arch {
my ( $ conf ) = @ _ ;
return $ conf - > { arch } // get_host_arch ( ) ;
}
2018-11-12 14:10:34 +01:00
my $ default_machines = {
x86_64 = > 'pc' ,
aarch64 = > 'virt' ,
} ;
2021-03-12 09:58:12 +01:00
sub get_installed_machine_version {
my ( $ kvmversion ) = @ _ ;
$ kvmversion = kvm_user_version ( ) if ! defined ( $ kvmversion ) ;
$ kvmversion =~ m/^(\d+\.\d+)/ ;
return $ 1 ;
}
sub windows_get_pinned_machine_version {
my ( $ machine , $ base_version , $ kvmversion ) = @ _ ;
my $ pin_version = $ base_version ;
if ( ! defined ( $ base_version ) ||
! PVE::QemuServer::Machine:: can_run_pve_machine_version ( $ base_version , $ kvmversion )
) {
$ pin_version = get_installed_machine_version ( $ kvmversion ) ;
}
if ( ! $ machine || $ machine eq 'pc' ) {
$ machine = "pc-i440fx-$pin_version" ;
} elsif ( $ machine eq 'q35' ) {
$ machine = "pc-q35-$pin_version" ;
} elsif ( $ machine eq 'virt' ) {
$ machine = "virt-$pin_version" ;
} else {
warn "unknown machine type '$machine', not touching that!\n" ;
}
return $ machine ;
}
2019-11-25 08:56:58 +01:00
sub get_vm_machine {
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
my ( $ conf , $ forcemachine , $ arch , $ add_pve_version , $ kvmversion ) = @ _ ;
2019-11-25 08:56:58 +01:00
my $ machine = $ forcemachine || $ conf - > { machine } ;
2018-11-12 14:10:34 +01:00
implement PVE Version addition for QEMU machine
With our QEMU 4.1.1 package we can pass a additional internal version
to QEMU's machine, it will be split out there and ignored, but
returned on a QMP 'query-machines' call.
This allows us to use it for increasing the granularity with which we
can roll-out HW layout changes/additions for VMs. Until now we
required a machine version bump, happening normally every major
release of QEMU, with seldom, for us irrelevant, exceptions.
This often delays rolling out a feature, which would break
live-migration, by several months. That can now be avoided, the new
"pve-version" component of the machine can be bumped at will, and
thus we are much more flexible.
That versions orders after the ($major, $minor) version components
from an stable release - it can thus also be reset on the next
release.
The implementation extends the qemu-machine REGEX, remembers
"pve-version" when doing a "query-machines" and integrates support
into the min_version and extract_version helpers.
We start out with a version of 1.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Reviewed-by: Stefan Reiter <s.reiter@proxmox.com>
2019-11-25 11:18:13 +01:00
if ( ! $ machine || $ machine =~ m/^(?:pc|q35|virt)$/ ) {
2021-03-05 20:46:28 +01:00
$ kvmversion // = kvm_user_version ( ) ;
# we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
# layout which confuses windows quite a bit and may result in various regressions..
# see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
if ( windows_version ( $ conf - > { ostype } ) ) {
2021-03-12 09:58:12 +01:00
$ machine = windows_get_pinned_machine_version ( $ machine , '5.1' , $ kvmversion ) ;
2021-03-05 20:46:28 +01:00
}
2019-11-25 08:56:58 +01:00
$ arch // = 'x86_64' ;
$ machine || = $ default_machines - > { $ arch } ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
if ( $ add_pve_version ) {
my $ pvever = PVE::QemuServer::Machine:: get_pve_version ( $ kvmversion ) ;
$ machine . = "+pve$pvever" ;
}
}
2021-03-31 16:39:23 +02:00
if ( $ add_pve_version && $ machine !~ m/\+pve\d+?(?:\.pxe)?$/ ) {
my $ is_pxe = $ machine =~ m/^(.*?)\.pxe$/ ;
$ machine = $ 1 if $ is_pxe ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
# for version-pinned machines that do not include a pve-version (e.g.
# pc-q35-4.1), we assume 0 to keep them stable in case we bump
$ machine . = '+pve0' ;
2021-03-31 16:39:23 +02:00
$ machine . = '.pxe' if $ is_pxe ;
2019-11-25 08:56:58 +01:00
}
return $ machine ;
2018-11-12 14:10:34 +01:00
}
2021-10-11 14:10:24 +02:00
sub get_ovmf_files ($$$) {
my ( $ arch , $ efidisk , $ smm ) = @ _ ;
2018-11-12 14:10:36 +01:00
2021-10-05 18:02:06 +02:00
my $ types = $ OVMF - > { $ arch }
2018-11-12 14:10:36 +01:00
or die "no OVMF images known for architecture '$arch'\n" ;
2021-10-05 18:02:06 +02:00
my $ type = 'default' ;
if ( defined ( $ efidisk - > { efitype } ) && $ efidisk - > { efitype } eq '4m' ) {
2021-10-11 14:10:24 +02:00
$ type = $ smm ? "4m" : "4m-no-smm" ;
$ type . = '-ms' if $ efidisk - > { 'pre-enrolled-keys' } ;
2021-10-05 18:02:06 +02:00
}
2023-01-16 15:24:10 +01:00
my ( $ ovmf_code , $ ovmf_vars ) = $ types - > { $ type } - > @ * ;
die "EFI base image '$ovmf_code' not found\n" if ! - f $ ovmf_code ;
die "EFI vars image '$ovmf_vars' not found\n" if ! - f $ ovmf_vars ;
return ( $ ovmf_code , $ ovmf_vars ) ;
2018-11-12 14:10:36 +01:00
}
2018-11-12 14:10:37 +01:00
my $ Arch2Qemu = {
aarch64 = > '/usr/bin/qemu-system-aarch64' ,
x86_64 = > '/usr/bin/qemu-system-x86_64' ,
} ;
sub get_command_for_arch ($) {
my ( $ arch ) = @ _ ;
return '/usr/bin/kvm' if is_native ( $ arch ) ;
my $ cmd = $ Arch2Qemu - > { $ arch }
or die "don't know how to emulate architecture '$arch'\n" ;
return $ cmd ;
}
2019-11-21 15:53:42 +01:00
# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
# to use in a QEMU command line (-cpu element), first array_intersect the result
# of query_supported_ with query_understood_. This is necessary because:
#
# a) query_understood_ returns flags the host cannot use and
# b) query_supported_ (rather the QMP call) doesn't actually return CPU
# flags, but CPU settings - with most of them being flags. Those settings
# (and some flags, curiously) cannot be specified as a "-cpu" argument.
#
# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
# expensive. If you need the value returned from this, you can get it much
# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
# $accel being 'kvm' or 'tcg'.
#
# pvestatd calls this function on startup and whenever the QEMU/KVM version
# changes, automatically populating pmxcfs.
#
# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
# since kvm and tcg machines support different flags
#
sub query_supported_cpu_flags {
2020-01-15 15:36:54 +01:00
my ( $ arch ) = @ _ ;
2019-11-21 15:53:42 +01:00
2020-01-15 15:36:54 +01:00
$ arch // = get_host_arch ( ) ;
my $ default_machine = $ default_machines - > { $ arch } ;
my $ flags = { } ;
2019-11-21 15:53:42 +01:00
# FIXME: Once this is merged, the code below should work for ARM as well:
# https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
$ arch eq "aarch64" ;
my $ kvm_supported = defined ( kvm_version ( ) ) ;
my $ qemu_cmd = get_command_for_arch ( $ arch ) ;
my $ fakevmid = - 1 ;
my $ pidfile = PVE::QemuServer::Helpers:: pidfile_name ( $ fakevmid ) ;
# Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
my $ query_supported_run_qemu = sub {
my ( $ kvm ) = @ _ ;
my $ flags = { } ;
my $ cmd = [
$ qemu_cmd ,
'-machine' , $ default_machine ,
'-display' , 'none' ,
2021-05-27 12:27:50 +02:00
'-chardev' , "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off" ,
2019-11-21 15:53:42 +01:00
'-mon' , 'chardev=qmp,mode=control' ,
'-pidfile' , $ pidfile ,
'-S' , '-daemonize'
] ;
if ( ! $ kvm ) {
push @$ cmd , '-accel' , 'tcg' ;
}
my $ rc = run_command ( $ cmd , noerr = > 1 , quiet = > 0 ) ;
die "QEMU flag querying VM exited with code " . $ rc if $ rc ;
eval {
my $ cmd_result = mon_cmd (
$ fakevmid ,
'query-cpu-model-expansion' ,
type = > 'full' ,
model = > { name = > 'host' }
) ;
my $ props = $ cmd_result - > { model } - > { props } ;
foreach my $ prop ( keys %$ props ) {
next if $ props - > { $ prop } ne '1' ;
# QEMU returns some flags multiple times, with '_', '.' or '-'
# (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
# We only keep those with underscores, to match /proc/cpuinfo
$ prop =~ s/\.|-/_/g ;
$ flags - > { $ prop } = 1 ;
}
} ;
my $ err = $@ ;
2021-10-18 09:40:03 +02:00
# force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
2019-11-21 15:53:42 +01:00
vm_stop ( undef , $ fakevmid , 1 , 1 , 10 , 0 , 1 ) ;
die $ err if $ err ;
return [ sort keys %$ flags ] ;
} ;
# We need to query QEMU twice, since KVM and TCG have different supported flags
PVE::QemuConfig - > lock_config ( $ fakevmid , sub {
$ flags - > { tcg } = eval { $ query_supported_run_qemu - > ( 0 ) } ;
warn "warning: failed querying supported tcg flags: $@\n" if $@ ;
if ( $ kvm_supported ) {
$ flags - > { kvm } = eval { $ query_supported_run_qemu - > ( 1 ) } ;
warn "warning: failed querying supported kvm flags: $@\n" if $@ ;
}
} ) ;
return $ flags ;
}
# Understood CPU flags are written to a file at 'pve-qemu' compile time
my $ understood_cpu_flag_dir = "/usr/share/kvm" ;
sub query_understood_cpu_flags {
my $ arch = get_host_arch ( ) ;
my $ filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch" ;
die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
if ! - e $ filepath ;
my $ raw = file_get_contents ( $ filepath ) ;
$ raw =~ s/^\s+|\s+$//g ;
my @ flags = split ( /\s+/ , $ raw ) ;
return \ @ flags ;
}
2021-11-05 14:06:11 +01:00
# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
my sub should_disable_smm {
my ( $ conf , $ vga ) = @ _ ;
return ( ! defined ( $ conf - > { bios } ) || $ conf - > { bios } eq 'seabios' ) &&
2021-11-11 13:05:58 +01:00
$ vga - > { type } && $ vga - > { type } =~ m/^(serial\d+|none)$/ ;
2021-11-05 14:06:11 +01:00
}
2022-12-02 13:59:31 +01:00
my sub print_ovmf_drive_commandlines {
my ( $ conf , $ storecfg , $ vmid , $ arch , $ q35 , $ version_guard ) = @ _ ;
2022-12-12 11:35:19 +01:00
my $ d = $ conf - > { efidisk0 } ? parse_drive ( 'efidisk0' , $ conf - > { efidisk0 } ) : undef ;
2022-12-02 13:59:31 +01:00
my ( $ ovmf_code , $ ovmf_vars ) = get_ovmf_files ( $ arch , $ d , $ q35 ) ;
2022-12-12 11:35:19 +01:00
my $ var_drive_str = "if=pflash,unit=1,id=drive-efidisk0" ;
2022-12-02 13:59:31 +01:00
if ( $ d ) {
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ d - > { file } , 1 ) ;
2022-12-12 11:35:19 +01:00
my ( $ path , $ format ) = $ d - > @ { 'file' , 'format' } ;
2022-12-02 13:59:31 +01:00
if ( $ storeid ) {
$ path = PVE::Storage:: path ( $ storecfg , $ d - > { file } ) ;
if ( ! defined ( $ format ) ) {
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
$ format = qemu_img_format ( $ scfg , $ volname ) ;
}
2022-12-12 11:35:19 +01:00
} elsif ( ! defined ( $ format ) ) {
die "efidisk format must be specified\n" ;
}
# SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
if ( $ path =~ m/^rbd:/ ) {
$ var_drive_str . = ',cache=writeback' ;
$ path . = ':rbd_cache_policy=writeback' ; # avoid write-around, we *need* to cache writes too
2022-12-02 13:59:31 +01:00
}
2022-12-12 11:35:19 +01:00
$ var_drive_str . = ",format=$format,file=$path" ;
2022-12-02 13:59:31 +01:00
2022-12-12 11:35:19 +01:00
$ var_drive_str . = ",size=" . ( - s $ ovmf_vars ) if $ format eq 'raw' && $ version_guard - > ( 4 , 1 , 2 ) ;
$ var_drive_str . = ',readonly=on' if drive_is_read_only ( $ conf , $ d ) ;
2022-12-02 13:59:31 +01:00
} else {
log_warn ( "no efidisk configured! Using temporary efivars disk." ) ;
2022-12-12 11:35:19 +01:00
my $ path = "/tmp/$vmid-ovmf.fd" ;
2022-12-02 13:59:31 +01:00
PVE::Tools:: file_copy ( $ ovmf_vars , $ path , - s $ ovmf_vars ) ;
2022-12-12 11:35:19 +01:00
$ var_drive_str . = ",format=raw,file=$path" ;
$ var_drive_str . = ",size=" . ( - s $ ovmf_vars ) if $ version_guard - > ( 4 , 1 , 2 ) ;
2022-12-02 13:59:31 +01:00
}
2022-12-12 11:35:19 +01:00
return ( "if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code" , $ var_drive_str ) ;
2022-12-02 13:59:31 +01:00
}
2011-08-23 07:47:04 +02:00
sub config_to_command {
2021-03-03 10:56:08 +01:00
my ( $ storecfg , $ vmid , $ conf , $ defaults , $ forcemachine , $ forcecpu ,
$ pbs_backing ) = @ _ ;
2011-08-23 07:47:04 +02:00
2021-10-20 17:31:45 +02:00
my ( $ globalFlags , $ machineFlags , $ rtcFlags ) = ( [] , [] , [] ) ;
2012-08-20 11:10:24 +02:00
my $ devices = [] ;
my $ bridges = { } ;
2016-02-18 12:54:06 +01:00
my $ ostype = $ conf - > { ostype } ;
2016-11-08 02:56:01 +01:00
my $ winversion = windows_version ( $ ostype ) ;
2018-11-12 14:10:34 +01:00
my $ kvm = $ conf - > { kvm } ;
2019-12-10 11:05:39 +01:00
my $ nodename = nodename ( ) ;
2018-11-12 14:10:34 +01:00
2019-11-25 08:56:58 +01:00
my $ arch = get_vm_arch ( $ conf ) ;
2019-08-13 15:19:07 +02:00
my $ kvm_binary = get_command_for_arch ( $ arch ) ;
my $ kvmver = kvm_user_version ( $ kvm_binary ) ;
2019-11-25 08:56:58 +01:00
2020-02-12 11:10:56 +01:00
if ( ! $ kvmver || $ kvmver !~ m/^(\d+)\.(\d+)/ || $ 1 < 3 ) {
$ kvmver // = "undefined" ;
die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n" ;
}
implement PVE Version addition for QEMU machine
With our QEMU 4.1.1 package we can pass a additional internal version
to QEMU's machine, it will be split out there and ignored, but
returned on a QMP 'query-machines' call.
This allows us to use it for increasing the granularity with which we
can roll-out HW layout changes/additions for VMs. Until now we
required a machine version bump, happening normally every major
release of QEMU, with seldom, for us irrelevant, exceptions.
This often delays rolling out a feature, which would break
live-migration, by several months. That can now be avoided, the new
"pve-version" component of the machine can be bumped at will, and
thus we are much more flexible.
That versions orders after the ($major, $minor) version components
from an stable release - it can thus also be reset on the next
release.
The implementation extends the qemu-machine REGEX, remembers
"pve-version" when doing a "query-machines" and integrates support
into the min_version and extract_version helpers.
We start out with a version of 1.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
Reviewed-by: Stefan Reiter <s.reiter@proxmox.com>
2019-11-25 11:18:13 +01:00
my $ add_pve_version = min_version ( $ kvmver , 4 , 1 ) ;
my $ machine_type = get_vm_machine ( $ conf , $ forcemachine , $ arch , $ add_pve_version ) ;
2020-09-02 14:07:02 +02:00
my $ machine_version = extract_version ( $ machine_type , $ kvmver ) ;
2018-11-12 14:10:34 +01:00
$ kvm // = 1 if is_native ( $ arch ) ;
2016-11-08 02:56:01 +01:00
2019-12-09 16:14:10 +01:00
$ machine_version =~ m/(\d+)\.(\d+)/ ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
my ( $ machine_major , $ machine_minor ) = ( $ 1 , $ 2 ) ;
2020-04-08 17:11:07 +02:00
if ( $ kvmver =~ m/^\d+\.\d+\.(\d+)/ && $ 1 >= 90 ) {
warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n" ;
} elsif ( ! min_version ( $ kvmver , $ machine_major , $ machine_minor ) ) {
2020-09-02 14:07:02 +02:00
die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
. " please upgrade node '$nodename'\n"
2020-04-08 17:11:07 +02:00
} elsif ( ! PVE::QemuServer::Machine:: can_run_pve_machine_version ( $ machine_version , $ kvmver ) ) {
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
my $ max_pve_version = PVE::QemuServer::Machine:: get_pve_version ( $ machine_version ) ;
2020-09-02 14:07:02 +02:00
die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
. " pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
. " node '$nodename'\n" ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
}
# if a specific +pve version is required for a feature, use $version_guard
# instead of min_version to allow machines to be run with the minimum
# required version
my $ required_pve_version = 0 ;
my $ version_guard = sub {
my ( $ major , $ minor , $ pve ) = @ _ ;
return 0 if ! min_version ( $ machine_version , $ major , $ minor , $ pve ) ;
2020-03-19 11:03:13 +01:00
my $ max_pve = PVE::QemuServer::Machine:: get_pve_version ( "$major.$minor" ) ;
return 1 if min_version ( $ machine_version , $ major , $ minor , $ max_pve + 1 ) ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
$ required_pve_version = $ pve if $ pve && $ pve > $ required_pve_version ;
return 1 ;
} ;
2019-12-09 16:14:10 +01:00
2020-09-02 14:07:02 +02:00
if ( $ kvm && ! defined kvm_version ( ) ) {
die "KVM virtualisation configured, but not available. Either disable in VM configuration"
. " or enable in BIOS.\n" ;
2018-11-12 14:10:34 +01:00
}
2017-08-21 11:47:18 +02:00
2019-11-19 12:23:48 +01:00
my $ q35 = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
my $ hotplug_features = parse_hotplug_features ( defined ( $ conf - > { hotplug } ) ? $ conf - > { hotplug } : '1' ) ;
2015-11-06 10:27:05 +01:00
my $ use_old_bios_files = undef ;
( $ use_old_bios_files , $ machine_type ) = qemu_use_old_bios_files ( $ machine_type ) ;
2014-06-18 06:54:45 +02:00
2022-11-15 08:34:47 +01:00
my $ cmd = [] ;
2022-06-30 17:09:45 -07:00
if ( $ conf - > { affinity } ) {
2022-11-15 08:34:47 +01:00
push @$ cmd , '/usr/bin/taskset' , '--cpu-list' , '--all-tasks' , $ conf - > { affinity } ;
2022-06-30 17:09:45 -07:00
}
2019-08-13 15:19:07 +02:00
push @$ cmd , $ kvm_binary ;
2011-08-23 07:47:04 +02:00
push @$ cmd , '-id' , $ vmid ;
2018-03-12 13:28:14 +01:00
my $ vmname = $ conf - > { name } || "vm$vmid" ;
2022-06-17 14:25:49 +02:00
push @$ cmd , '-name' , "$vmname,debug-threads=on" ;
2018-03-12 13:28:14 +01:00
2020-10-19 14:18:40 +02:00
push @$ cmd , '-no-shutdown' ;
2011-08-23 07:47:04 +02:00
my $ use_virtio = 0 ;
2019-11-19 12:23:44 +01:00
my $ qmpsocket = PVE::QemuServer::Helpers:: qmp_socket ( $ vmid ) ;
2021-05-27 12:27:50 +02:00
push @$ cmd , '-chardev' , "socket,id=qmp,path=$qmpsocket,server=on,wait=off" ;
2012-05-29 14:01:50 +02:00
push @$ cmd , '-mon' , "chardev=qmp,mode=control" ;
2019-11-19 12:23:49 +01:00
if ( min_version ( $ machine_version , 2 , 12 ) ) {
2018-11-14 14:59:58 +01:00
push @$ cmd , '-chardev' , "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5" ;
2018-10-17 14:31:19 +02:00
push @$ cmd , '-mon' , "chardev=qmp-event,mode=control" ;
}
2011-08-23 07:47:04 +02:00
2019-11-19 12:23:44 +01:00
push @$ cmd , '-pidfile' , PVE::QemuServer::Helpers:: pidfile_name ( $ vmid ) ;
2011-09-12 12:26:00 +02:00
2011-08-23 07:47:04 +02:00
push @$ cmd , '-daemonize' ;
2014-06-26 11:12:25 +02:00
if ( $ conf - > { smbios1 } ) {
2019-06-11 12:13:52 +02:00
my $ smbios_conf = parse_smbios1 ( $ conf - > { smbios1 } ) ;
if ( $ smbios_conf - > { base64 } ) {
# Do not pass base64 flag to qemu
delete $ smbios_conf - > { base64 } ;
my $ smbios_string = "" ;
foreach my $ key ( keys %$ smbios_conf ) {
my $ value ;
if ( $ key eq "uuid" ) {
$ value = $ smbios_conf - > { uuid }
} else {
$ value = decode_base64 ( $ smbios_conf - > { $ key } ) ;
}
# qemu accepts any binary data, only commas need escaping by double comma
$ value =~ s/,/,,/g ;
$ smbios_string . = "," . $ key . "=" . $ value if $ value ;
}
push @$ cmd , '-smbios' , "type=1" . $ smbios_string ;
} else {
push @$ cmd , '-smbios' , "type=1,$conf->{smbios1}" ;
}
2014-06-26 11:12:25 +02:00
}
2015-12-10 10:48:04 +01:00
if ( $ conf - > { bios } && $ conf - > { bios } eq 'ovmf' ) {
2022-12-02 13:59:31 +01:00
my ( $ code_drive_str , $ var_drive_str ) =
print_ovmf_drive_commandlines ( $ conf , $ storecfg , $ vmid , $ arch , $ q35 , $ version_guard ) ;
push $ cmd - > @ * , '-drive' , $ code_drive_str ;
push $ cmd - > @ * , '-drive' , $ var_drive_str ;
2015-11-21 08:48:59 +01:00
}
2021-10-20 12:56:57 +02:00
if ( $ q35 ) { # tell QEMU to load q35 config early
2019-07-08 11:25:10 +02:00
# we use different pcie-port hardware for qemu >= 4.0 for passthrough
2019-11-19 12:23:49 +01:00
if ( min_version ( $ machine_version , 4 , 0 ) ) {
2019-07-08 11:25:10 +02:00
push @$ devices , '-readconfig' , '/usr/share/qemu-server/pve-q35-4.0.cfg' ;
} else {
push @$ devices , '-readconfig' , '/usr/share/qemu-server/pve-q35.cfg' ;
}
}
2016-02-10 12:52:12 +01:00
2021-10-21 09:51:22 +02:00
if ( defined ( my $ fixups = qemu_created_version_fixups ( $ conf , $ forcemachine , $ kvmver ) ) ) {
push @$ cmd , $ fixups - > @ * ;
}
2020-01-31 15:41:21 +01:00
if ( $ conf - > { vmgenid } ) {
push @$ devices , '-device' , 'vmgenid,guid=' . $ conf - > { vmgenid } ;
}
2016-06-14 10:50:37 +02:00
# add usb controllers
2020-09-02 14:07:02 +02:00
my @ usbcontrollers = PVE::QemuServer::USB:: get_usb_controllers (
2022-11-10 15:35:56 +01:00
$ conf , $ bridges , $ arch , $ machine_type , $ usbdesc - > { format } , $ MAX_USB_DEVICES , $ machine_version ) ;
2016-06-14 10:50:37 +02:00
push @$ devices , @ usbcontrollers if @ usbcontrollers ;
2018-11-09 13:31:09 +01:00
my $ vga = parse_vga ( $ conf - > { vga } ) ;
add multi-monitors spice support
add qxl2 (2monitors),qxl3 (3monitors),qxl4 (4monitors) vga type.
For linux, we only need 1 qxl card with more memory
For windows, we need 1 qxl card by monitor
Original Information from spice-mailing
"
You need to specify multiple devices for Windows VMs. This is what
libvirt gives me (via 'virsh domxml-to-native qemu argv DOMAIN_XML'):
<...> -vga qxl -global qxl-vga.ram_size=67108864 -global qxl-vga.vram_size=33554432 -device qxl,id=video1,ram_size=67108864,vram_size=33554432 -device qxl,id=video2,ram_size=67108864,vram_size=33554432 -device qxl,id=video3,ram_size=67108864,vram_size=33554432
For Linux VM, just one qxl device is OK but then it's advisable to
increase the available RAM:
<...> -vga qxl -global qxl-vga.ram_size=134217728 -global qxl-vga.vram_size=33554432
If you don't turn off surfaces, then you should increase vram size to
say 64 MB from current default of 32 MB.
"
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2013-09-25 15:33:08 +02:00
2018-11-09 13:31:09 +01:00
my $ qxlnum = vga_conf_has_spice ( $ conf - > { vga } ) ;
$ vga - > { type } = 'qxl' if $ qxlnum ;
add multi-monitors spice support
add qxl2 (2monitors),qxl3 (3monitors),qxl4 (4monitors) vga type.
For linux, we only need 1 qxl card with more memory
For windows, we need 1 qxl card by monitor
Original Information from spice-mailing
"
You need to specify multiple devices for Windows VMs. This is what
libvirt gives me (via 'virsh domxml-to-native qemu argv DOMAIN_XML'):
<...> -vga qxl -global qxl-vga.ram_size=67108864 -global qxl-vga.vram_size=33554432 -device qxl,id=video1,ram_size=67108864,vram_size=33554432 -device qxl,id=video2,ram_size=67108864,vram_size=33554432 -device qxl,id=video3,ram_size=67108864,vram_size=33554432
For Linux VM, just one qxl device is OK but then it's advisable to
increase the available RAM:
<...> -vga qxl -global qxl-vga.ram_size=134217728 -global qxl-vga.vram_size=33554432
If you don't turn off surfaces, then you should increase vram size to
say 64 MB from current default of 32 MB.
"
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2013-09-25 15:33:08 +02:00
2018-11-09 13:31:09 +01:00
if ( ! $ vga - > { type } ) {
2018-11-12 14:10:44 +01:00
if ( $ arch eq 'aarch64' ) {
$ vga - > { type } = 'virtio' ;
2019-11-19 12:23:49 +01:00
} elsif ( min_version ( $ machine_version , 2 , 9 ) ) {
2018-11-09 13:31:09 +01:00
$ vga - > { type } = ( ! $ winversion || $ winversion >= 6 ) ? 'std' : 'cirrus' ;
2017-07-12 07:34:51 +02:00
} else {
2018-11-09 13:31:09 +01:00
$ vga - > { type } = ( $ winversion >= 6 ) ? 'std' : 'cirrus' ;
2017-07-12 07:34:51 +02:00
}
2013-07-19 09:53:44 +02:00
}
2011-08-23 07:47:04 +02:00
# enable absolute mouse coordinates (needed by vnc)
2021-10-20 15:39:20 +02:00
my $ tablet = $ conf - > { tablet } ;
if ( ! defined ( $ tablet ) ) {
2013-07-19 09:53:44 +02:00
$ tablet = $ defaults - > { tablet } ;
2013-10-02 09:11:57 +02:00
$ tablet = 0 if $ qxlnum ; # disable for spice because it is not needed
2018-11-09 13:31:09 +01:00
$ tablet = 0 if $ vga - > { type } =~ m/^serial\d+$/ ; # disable if we use serial terminal (no vga card)
2013-07-19 09:53:44 +02:00
}
2018-11-12 14:10:42 +01:00
if ( $ tablet ) {
push @$ devices , '-device' , print_tabletdevice_full ( $ conf , $ arch ) if $ tablet ;
my $ kbd = print_keyboarddevice_full ( $ conf , $ arch ) ;
push @$ devices , '-device' , $ kbd if defined ( $ kbd ) ;
}
2014-11-10 06:31:08 +01:00
2020-10-16 16:52:11 +02:00
my $ bootorder = device_bootorder ( $ conf ) ;
2020-10-06 15:32:15 +02:00
2020-06-18 16:36:53 +02:00
# host pci device passthrough
2020-06-18 16:36:54 +02:00
my ( $ kvm_off , $ gpu_passthrough , $ legacy_igd ) = PVE::QemuServer::PCI:: print_hostpci_devices (
2020-10-16 17:53:28 +02:00
$ vmid , $ conf , $ devices , $ vga , $ winversion , $ q35 , $ bridges , $ arch , $ machine_type , $ bootorder ) ;
2011-08-23 07:47:04 +02:00
# usb devices
2019-09-11 14:43:32 +02:00
my $ usb_dev_features = { } ;
2019-11-19 12:23:49 +01:00
$ usb_dev_features - > { spice_usb3 } = 1 if min_version ( $ machine_version , 4 , 0 ) ;
2019-09-11 14:43:32 +02:00
2020-09-02 14:07:02 +02:00
my @ usbdevices = PVE::QemuServer::USB:: get_usb_devices (
2022-11-10 15:35:56 +01:00
$ conf , $ usbdesc - > { format } , $ MAX_USB_DEVICES , $ usb_dev_features , $ bootorder , $ machine_version ) ;
2016-06-14 10:50:37 +02:00
push @$ devices , @ usbdevices if @ usbdevices ;
2020-10-06 15:32:15 +02:00
2011-08-23 07:47:04 +02:00
# serial devices
2011-09-11 09:00:00 +02:00
for ( my $ i = 0 ; $ i < $ MAX_SERIAL_PORTS ; $ i + + ) {
2021-10-20 15:39:20 +02:00
my $ path = $ conf - > { "serial$i" } or next ;
if ( $ path eq 'socket' ) {
my $ socket = "/var/run/qemu-server/${vmid}.serial$i" ;
push @$ devices , '-chardev' , "socket,id=serial$i,path=$socket,server=on,wait=off" ;
2022-12-20 10:23:32 +01:00
# On aarch64, serial0 is the UART device. QEMU only allows
2021-10-20 15:39:20 +02:00
# connecting UART devices via the '-serial' command line, as
# the device has a fixed slot on the hardware...
if ( $ arch eq 'aarch64' && $ i == 0 ) {
push @$ devices , '-serial' , "chardev:serial$i" ;
2013-07-31 06:58:26 +02:00
} else {
push @$ devices , '-device' , "isa-serial,chardev=serial$i" ;
}
2021-10-20 15:39:20 +02:00
} else {
die "no such serial device\n" if ! - c $ path ;
push @$ devices , '-chardev' , "tty,id=serial$i,path=$path" ;
push @$ devices , '-device' , "isa-serial,chardev=serial$i" ;
2011-09-12 08:59:05 +02:00
}
2011-08-23 07:47:04 +02:00
}
# parallel devices
2011-09-11 09:00:01 +02:00
for ( my $ i = 0 ; $ i < $ MAX_PARALLEL_PORTS ; $ i + + ) {
2011-09-12 08:59:05 +02:00
if ( my $ path = $ conf - > { "parallel$i" } ) {
2011-09-12 12:26:00 +02:00
die "no such parallel device\n" if ! - c $ path ;
2013-08-14 15:55:01 +02:00
my $ devtype = $ path =~ m !^/dev/usb/lp! ? 'tty' : 'parport' ;
2013-08-14 14:22:24 +02:00
push @$ devices , '-chardev' , "$devtype,id=parallel$i,path=$path" ;
2012-08-20 11:10:24 +02:00
push @$ devices , '-device' , "isa-parallel,chardev=parallel$i" ;
2011-09-12 08:59:05 +02:00
}
2011-08-23 07:47:04 +02:00
}
2020-04-17 15:20:05 +02:00
if ( min_version ( $ machine_version , 4 , 0 ) && ( my $ audio = conf_has_audio ( $ conf ) ) ) {
2019-07-17 15:58:57 +02:00
my $ audiopciaddr = print_pci_addr ( "audio0" , $ bridges , $ arch , $ machine_type ) ;
2020-05-06 14:51:54 +02:00
my $ audio_devs = audio_devs ( $ audio , $ audiopciaddr , $ machine_version ) ;
2020-04-17 15:20:05 +02:00
push @$ devices , @$ audio_devs ;
2019-07-17 15:58:57 +02:00
}
2011-09-12 12:26:00 +02:00
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
add_tpm_device ( $ vmid , $ devices , $ conf ) ;
2011-08-23 07:47:04 +02:00
my $ sockets = 1 ;
$ sockets = $ conf - > { smp } if $ conf - > { smp } ; # old style - no longer iused
$ sockets = $ conf - > { sockets } if $ conf - > { sockets } ;
my $ cores = $ conf - > { cores } || 1 ;
2014-01-07 13:32:50 +01:00
2015-01-09 16:30:35 +01:00
my $ maxcpus = $ sockets * $ cores ;
2014-11-17 09:52:30 +01:00
2015-01-09 16:30:35 +01:00
my $ vcpus = $ conf - > { vcpus } ? $ conf - > { vcpus } : $ maxcpus ;
2014-11-17 09:52:30 +01:00
2015-01-09 16:30:35 +01:00
my $ allowed_vcpus = $ cpuinfo - > { cpus } ;
2021-10-20 12:56:57 +02:00
die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ( $ allowed_vcpus < $ maxcpus ) ;
2011-08-23 07:47:04 +02:00
2021-10-20 12:56:57 +02:00
if ( $ hotplug_features - > { cpu } && min_version ( $ machine_version , 2 , 7 ) ) {
2016-10-17 12:18:55 +02:00
push @$ cmd , '-smp' , "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus" ;
for ( my $ i = 2 ; $ i <= $ vcpus ; $ i + + ) {
my $ cpustr = print_cpu_device ( $ conf , $ i ) ;
push @$ cmd , '-device' , $ cpustr ;
}
} else {
push @$ cmd , '-smp' , "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus" ;
}
2011-08-23 07:47:04 +02:00
push @$ cmd , '-nodefaults' ;
2016-09-08 09:25:57 +02:00
push @$ cmd , '-boot' , "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg" ;
2011-08-23 07:47:04 +02:00
2011-09-15 09:11:27 +02:00
push @$ cmd , '-no-acpi' if defined ( $ conf - > { acpi } ) && $ conf - > { acpi } == 0 ;
2011-08-23 07:47:04 +02:00
2011-09-15 09:11:27 +02:00
push @$ cmd , '-no-reboot' if defined ( $ conf - > { reboot } ) && $ conf - > { reboot } == 0 ;
2011-08-23 07:47:04 +02:00
2018-11-12 08:40:20 +01:00
if ( $ vga - > { type } && $ vga - > { type } !~ m/^serial\d+$/ && $ vga - > { type } ne 'none' ) {
2020-09-02 14:07:02 +02:00
push @$ devices , '-device' , print_vga_device (
$ conf , $ vga , $ arch , $ machine_version , $ machine_type , undef , $ qxlnum , $ bridges ) ;
2022-04-22 14:28:09 +02:00
push @$ cmd , '-display' , 'egl-headless,gl=core' if $ vga - > { type } eq 'virtio-gl' ; # VIRGL
2019-11-19 12:23:44 +01:00
my $ socket = PVE::QemuServer::Helpers:: vnc_socket ( $ vmid ) ;
2021-05-27 12:27:50 +02:00
push @$ cmd , '-vnc' , "unix:$socket,password=on" ;
2016-02-18 08:14:43 +01:00
} else {
2018-11-09 13:31:09 +01:00
push @$ cmd , '-vga' , 'none' if $ vga - > { type } eq 'none' ;
2016-02-18 08:14:43 +01:00
push @$ cmd , '-nographic' ;
}
2011-08-23 07:47:04 +02:00
# time drift fix
2011-09-15 09:11:27 +02:00
my $ tdf = defined ( $ conf - > { tdf } ) ? $ conf - > { tdf } : $ defaults - > { tdf } ;
2012-09-26 12:42:03 +02:00
my $ useLocaltime = $ conf - > { localtime } ;
2011-08-23 07:47:04 +02:00
2016-11-08 02:56:01 +01:00
if ( $ winversion >= 5 ) { # windows
$ useLocaltime = 1 if ! defined ( $ conf - > { localtime } ) ;
2016-05-20 10:26:08 +02:00
2016-11-08 02:56:01 +01:00
# use time drift fix when acpi is enabled
if ( ! ( defined ( $ conf - > { acpi } ) && $ conf - > { acpi } == 0 ) ) {
$ tdf = 1 if ! defined ( $ conf - > { tdf } ) ;
2013-07-15 08:51:37 +02:00
}
2016-11-08 02:56:01 +01:00
}
2013-07-15 08:51:37 +02:00
2016-11-08 02:56:01 +01:00
if ( $ winversion >= 6 ) {
push @$ globalFlags , 'kvm-pit.lost_tick_policy=discard' ;
push @$ cmd , '-no-hpet' ;
2011-08-23 07:47:04 +02:00
}
2012-09-26 12:42:03 +02:00
push @$ rtcFlags , 'driftfix=slew' if $ tdf ;
2020-04-08 17:44:16 +02:00
if ( $ conf - > { startdate } && $ conf - > { startdate } ne 'now' ) {
2012-09-26 12:42:03 +02:00
push @$ rtcFlags , "base=$conf->{startdate}" ;
} elsif ( $ useLocaltime ) {
push @$ rtcFlags , 'base=localtime' ;
}
2011-08-23 07:47:04 +02:00
2020-04-07 15:56:15 +02:00
if ( $ forcecpu ) {
push @$ cmd , '-cpu' , $ forcecpu ;
} else {
2020-04-08 17:44:16 +02:00
push @$ cmd , get_cpu_options ( $ conf , $ arch , $ kvm , $ kvm_off , $ machine_version , $ winversion , $ gpu_passthrough ) ;
2020-04-07 15:56:15 +02:00
}
2013-07-15 08:51:35 +02:00
2023-02-13 13:00:09 +01:00
PVE::QemuServer::Memory:: config (
$ conf , $ vmid , $ sockets , $ cores , $ defaults , $ hotplug_features - > { memory } , $ cmd ) ;
2019-05-03 14:22:38 +02:00
2011-08-23 07:47:04 +02:00
push @$ cmd , '-S' if $ conf - > { freeze } ;
2018-03-20 14:26:43 +01:00
push @$ cmd , '-k' , $ conf - > { keyboard } if defined ( $ conf - > { keyboard } ) ;
2011-08-23 07:47:04 +02:00
2019-11-18 17:46:12 +11:00
my $ guest_agent = parse_guest_agent ( $ conf ) ;
if ( $ guest_agent - > { enabled } ) {
2019-11-19 12:23:44 +01:00
my $ qgasocket = PVE::QemuServer::Helpers:: qmp_socket ( $ vmid , 1 ) ;
2021-05-27 12:27:50 +02:00
push @$ devices , '-chardev' , "socket,path=$qgasocket,server=on,wait=off,id=qga0" ;
2019-11-18 17:46:12 +11:00
2019-11-20 13:24:57 +01:00
if ( ! $ guest_agent - > { type } || $ guest_agent - > { type } eq 'virtio' ) {
2019-11-18 17:46:12 +11:00
my $ pciaddr = print_pci_addr ( "qga0" , $ bridges , $ arch , $ machine_type ) ;
push @$ devices , '-device' , "virtio-serial,id=qga0$pciaddr" ;
push @$ devices , '-device' , 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0' ;
} elsif ( $ guest_agent - > { type } eq 'isa' ) {
push @$ devices , '-device' , "isa-serial,chardev=qga0" ;
}
2012-09-03 09:51:08 +02:00
}
2020-10-16 16:52:11 +02:00
my $ rng = $ conf - > { rng0 } ? parse_rng ( $ conf - > { rng0 } ) : undef ;
if ( $ rng && $ version_guard - > ( 4 , 1 , 2 ) ) {
2020-05-05 16:53:12 +02:00
check_rng_source ( $ rng - > { source } ) ;
fix #2264: add virtio-rng device
Allow a user to add a virtio-rng-pci (an emulated hardware random
number generator) to a VM with the rng0 setting. The setting is
version_guard()-ed.
Limit the selection of entropy source to one of three:
/dev/urandom (preferred): Non-blocking kernel entropy source
/dev/random: Blocking kernel source
/dev/hwrng: Hardware RNG on the host for passthrough
QEMU itself defaults to /dev/urandom (or the equivalent getrandom()
call) if no source file is given, but I don't fully trust that
behaviour to stay constant, considering the documentation [0] already
disagrees with the code [1], so let's always specify the file ourselves.
/dev/urandom is preferred, since it prevents host entropy starvation.
The quality of randomness is still good enough to emulate a hwrng, since
a) it's still seeded from the kernel's true entropy pool periodically
and b) it's mixed with true entropy in the guest as well.
Additionally, all sources about entropy predicition attacks I could find
mention that to predict /dev/urandom results, /dev/random has to be
accessed or manipulated in one way or the other - this is not possible
from a VM however, as the entropy we're talking about comes from the
*hosts* blocking pool.
More about the entropy and security implications of the non-blocking
interface in [2] and [3].
Note further that only one /dev/hwrng exists at any given time, if
multiple RNGs are available, only the one selected in
'/sys/devices/virtual/misc/hw_random/rng_current' will feed the file.
Selecting this is left as an exercise to the user, if at all required.
We limit the available entropy to 1 KiB/s by default, but allow the user
to override this. Interesting to note is that the limiter does not work
linearly, i.e. max_bytes=1024/period=1000 means that up to 1 KiB of data
becomes available on a 1000 millisecond timer, not that 1 KiB is
streamed to the guest over the course of one second - hence the
configurable period.
The default used here is the same as given in the QEMU documentation [0]
and has been verified to affect entropy availability in a guest by
measuring /dev/random throughput. 1 KiB/s is enough to avoid any
early-boot entropy shortages, and already has a significant impact on
/dev/random availability in the guest.
[0] https://wiki.qemu.org/Features/VirtIORNG
[1] https://git.qemu.org/?p=qemu.git;a=blob;f=crypto/random-platform.c;h=f92f96987d7d262047c7604b169a7fdf11236107;hb=HEAD
[2] https://lwn.net/Articles/261804/
[3] https://lwn.net/Articles/808575/
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-20 18:10:44 +01:00
my $ max_bytes = $ rng - > { max_bytes } // $ rng_fmt - > { max_bytes } - > { default } ;
my $ period = $ rng - > { period } // $ rng_fmt - > { period } - > { default } ;
my $ limiter_str = "" ;
if ( $ max_bytes ) {
$ limiter_str = ",max-bytes=$max_bytes,period=$period" ;
}
my $ rng_addr = print_pci_addr ( "rng0" , $ bridges , $ arch , $ machine_type ) ;
push @$ devices , '-object' , "rng-random,filename=$rng->{source},id=rng0" ;
push @$ devices , '-device' , "virtio-rng-pci,rng=rng0$limiter_str$rng_addr" ;
}
2013-07-24 11:24:20 +02:00
my $ spice_port ;
add multi-monitors spice support
add qxl2 (2monitors),qxl3 (3monitors),qxl4 (4monitors) vga type.
For linux, we only need 1 qxl card with more memory
For windows, we need 1 qxl card by monitor
Original Information from spice-mailing
"
You need to specify multiple devices for Windows VMs. This is what
libvirt gives me (via 'virsh domxml-to-native qemu argv DOMAIN_XML'):
<...> -vga qxl -global qxl-vga.ram_size=67108864 -global qxl-vga.vram_size=33554432 -device qxl,id=video1,ram_size=67108864,vram_size=33554432 -device qxl,id=video2,ram_size=67108864,vram_size=33554432 -device qxl,id=video3,ram_size=67108864,vram_size=33554432
For Linux VM, just one qxl device is OK but then it's advisable to
increase the available RAM:
<...> -vga qxl -global qxl-vga.ram_size=134217728 -global qxl-vga.vram_size=33554432
If you don't turn off surfaces, then you should increase vram size to
say 64 MB from current default of 32 MB.
"
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2013-09-25 15:33:08 +02:00
2022-04-28 17:46:57 +02:00
if ( $ qxlnum || $ vga - > { type } =~ /^virtio/ ) {
2013-10-02 09:11:57 +02:00
if ( $ qxlnum > 1 ) {
2016-11-11 09:32:11 +01:00
if ( $ winversion ) {
2020-04-08 17:44:16 +02:00
for ( my $ i = 1 ; $ i < $ qxlnum ; $ i + + ) {
2020-09-02 14:07:02 +02:00
push @$ devices , '-device' , print_vga_device (
$ conf , $ vga , $ arch , $ machine_version , $ machine_type , $ i , $ qxlnum , $ bridges ) ;
2013-10-02 09:11:57 +02:00
}
} else {
# assume other OS works like Linux
2018-11-09 13:31:09 +01:00
my ( $ ram , $ vram ) = ( "134217728" , "67108864" ) ;
if ( $ vga - > { memory } ) {
$ ram = PVE::Tools:: convert_size ( $ qxlnum * 4 * $ vga - > { memory } , 'mb' = > 'b' ) ;
$ vram = PVE::Tools:: convert_size ( $ qxlnum * 2 * $ vga - > { memory } , 'mb' = > 'b' ) ;
}
push @$ cmd , '-global' , "qxl-vga.ram_size=$ram" ;
push @$ cmd , '-global' , "qxl-vga.vram_size=$vram" ;
add multi-monitors spice support
add qxl2 (2monitors),qxl3 (3monitors),qxl4 (4monitors) vga type.
For linux, we only need 1 qxl card with more memory
For windows, we need 1 qxl card by monitor
Original Information from spice-mailing
"
You need to specify multiple devices for Windows VMs. This is what
libvirt gives me (via 'virsh domxml-to-native qemu argv DOMAIN_XML'):
<...> -vga qxl -global qxl-vga.ram_size=67108864 -global qxl-vga.vram_size=33554432 -device qxl,id=video1,ram_size=67108864,vram_size=33554432 -device qxl,id=video2,ram_size=67108864,vram_size=33554432 -device qxl,id=video3,ram_size=67108864,vram_size=33554432
For Linux VM, just one qxl device is OK but then it's advisable to
increase the available RAM:
<...> -vga qxl -global qxl-vga.ram_size=134217728 -global qxl-vga.vram_size=33554432
If you don't turn off surfaces, then you should increase vram size to
say 64 MB from current default of 32 MB.
"
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2013-09-25 15:33:08 +02:00
}
}
2018-11-12 14:10:42 +01:00
my $ pciaddr = print_pci_addr ( "spice" , $ bridges , $ arch , $ machine_type ) ;
2013-07-24 09:52:33 +02:00
2015-05-12 12:14:03 +02:00
my $ pfamily = PVE::Tools:: get_host_address_family ( $ nodename ) ;
2017-05-30 15:30:14 +02:00
my @ nodeaddrs = PVE::Tools:: getaddrinfo_all ( 'localhost' , family = > $ pfamily ) ;
die "failed to get an ip address of type $pfamily for 'localhost'\n" if ! @ nodeaddrs ;
2019-10-08 17:56:15 +02:00
push @$ devices , '-device' , "virtio-serial,id=spice$pciaddr" ;
push @$ devices , '-chardev' , "spicevmc,id=vdagent,name=vdagent" ;
push @$ devices , '-device' , "virtserialport,chardev=vdagent,name=com.redhat.spice.0" ;
2017-05-30 15:30:14 +02:00
my $ localhost = PVE::Network:: addr_to_ip ( $ nodeaddrs [ 0 ] - > { addr } ) ;
$ spice_port = PVE::Tools:: next_spice_port ( $ pfamily , $ localhost ) ;
2013-07-17 11:33:02 +02:00
2020-09-02 14:07:02 +02:00
my $ spice_enhancement_str = $ conf - > { spice_enhancements } // '' ;
my $ spice_enhancement = parse_property_string ( $ spice_enhancements_fmt , $ spice_enhancement_str ) ;
2019-09-04 13:26:11 +02:00
if ( $ spice_enhancement - > { foldersharing } ) {
push @$ devices , '-chardev' , "spiceport,id=foldershare,name=org.spice-space.webdav.0" ;
push @$ devices , '-device' , "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0" ;
}
2019-08-22 17:33:18 +02:00
2019-09-04 13:26:11 +02:00
my $ spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on" ;
2020-09-02 14:07:02 +02:00
$ spice_opts . = ",streaming-video=$spice_enhancement->{videostreaming}"
if $ spice_enhancement - > { videostreaming } ;
2019-09-04 13:26:11 +02:00
push @$ devices , '-spice' , "$spice_opts" ;
2013-06-25 07:10:42 +02:00
}
2012-12-19 07:30:34 +01:00
# enable balloon by default, unless explicitly disabled
if ( ! defined ( $ conf - > { balloon } ) || $ conf - > { balloon } ) {
2021-10-20 17:31:45 +02:00
my $ pciaddr = print_pci_addr ( "balloon0" , $ bridges , $ arch , $ machine_type ) ;
2022-03-06 13:46:46 +01:00
my $ ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr" ;
$ ballooncmd . = ",free-page-reporting=on" if min_version ( $ machine_version , 6 , 2 ) ;
push @$ devices , '-device' , $ ballooncmd ;
2012-12-19 07:30:34 +01:00
}
2011-08-23 07:47:04 +02:00
2011-09-08 11:39:56 +02:00
if ( $ conf - > { watchdog } ) {
my $ wdopts = parse_watchdog ( $ conf - > { watchdog } ) ;
2021-10-20 17:31:45 +02:00
my $ pciaddr = print_pci_addr ( "watchdog" , $ bridges , $ arch , $ machine_type ) ;
2011-10-03 14:53:10 +02:00
my $ watchdog = $ wdopts - > { model } || 'i6300esb' ;
2012-08-20 11:10:24 +02:00
push @$ devices , '-device' , "$watchdog$pciaddr" ;
push @$ devices , '-watchdog-action' , $ wdopts - > { action } if $ wdopts - > { action } ;
2011-09-08 11:39:56 +02:00
}
2011-08-23 07:47:04 +02:00
my $ vollist = [] ;
2011-09-09 10:27:21 +02:00
my $ scsicontroller = { } ;
2012-02-01 13:25:20 +01:00
my $ ahcicontroller = { } ;
2012-07-30 14:58:40 +02:00
my $ scsihw = defined ( $ conf - > { scsihw } ) ? $ conf - > { scsihw } : $ defaults - > { scsihw } ;
2011-08-23 07:47:04 +02:00
2014-05-17 09:14:58 +02:00
# Add iscsi initiator name if available
if ( my $ initiator = get_initiator_name ( ) ) {
push @$ devices , '-iscsi' , "initiator-name=$initiator" ;
}
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2011-08-23 07:47:04 +02:00
my ( $ ds , $ drive ) = @ _ ;
2011-11-25 08:05:36 +01:00
if ( PVE::Storage:: parse_volume_id ( $ drive - > { file } , 1 ) ) {
2021-06-22 14:30:30 +02:00
check_volume_storage_type ( $ storecfg , $ drive - > { file } ) ;
2011-08-23 07:47:04 +02:00
push @$ vollist , $ drive - > { file } ;
2011-11-25 08:05:36 +01:00
}
2012-01-27 09:35:26 +01:00
2017-09-11 08:40:29 +02:00
# ignore efidisk here, already added in bios/fw handling code above
return if $ drive - > { interface } eq 'efidisk' ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
# similar for TPM
return if $ drive - > { interface } eq 'tpmstate' ;
2017-09-11 08:40:29 +02:00
2011-08-23 07:47:04 +02:00
$ use_virtio = 1 if $ ds =~ m/^virtio/ ;
2011-12-07 11:54:31 +01:00
2020-10-06 15:32:15 +02:00
$ drive - > { bootindex } = $ bootorder - > { $ ds } if $ bootorder - > { $ ds } ;
2011-12-07 11:54:31 +01:00
2020-04-08 17:44:16 +02:00
if ( $ drive - > { interface } eq 'virtio' ) {
2015-03-19 11:06:11 +01:00
push @$ cmd , '-object' , "iothread,id=iothread-$ds" if $ drive - > { iothread } ;
}
2020-04-08 17:44:16 +02:00
if ( $ drive - > { interface } eq 'scsi' ) {
2012-07-30 14:58:40 +02:00
2015-03-27 03:41:54 +01:00
my ( $ maxdev , $ controller , $ controller_prefix ) = scsihw_infos ( $ conf , $ drive ) ;
2015-03-27 03:41:52 +01:00
2020-02-10 16:05:36 +01:00
die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
if $ drive - > { index } > 13 && ! & $ version_guard ( 4 , 1 , 2 ) ;
2021-10-20 17:31:45 +02:00
my $ pciaddr = print_pci_addr ( "$controller_prefix$controller" , $ bridges , $ arch , $ machine_type ) ;
2015-03-27 06:16:24 +01:00
my $ scsihw_type = $ scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $ scsihw ;
2015-04-01 05:11:43 +02:00
my $ iothread = '' ;
if ( $ conf - > { scsihw } && $ conf - > { scsihw } eq "virtio-scsi-single" && $ drive - > { iothread } ) {
$ iothread . = ",iothread=iothread-$controller_prefix$controller" ;
push @$ cmd , '-object' , "iothread,id=iothread-$controller_prefix$controller" ;
2016-05-03 14:00:28 +02:00
} elsif ( $ drive - > { iothread } ) {
2022-06-13 15:54:25 +02:00
log_warn (
"iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
) ;
2015-04-01 05:11:43 +02:00
}
2015-04-02 06:10:54 +02:00
my $ queues = '' ;
if ( $ conf - > { scsihw } && $ conf - > { scsihw } eq "virtio-scsi-single" && $ drive - > { queues } ) {
$ queues = ",num_queues=$drive->{queues}" ;
2019-05-03 14:22:38 +02:00
}
2015-04-02 06:10:54 +02:00
2020-09-02 14:07:02 +02:00
push @$ devices , '-device' , "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
if ! $ scsicontroller - > { $ controller } ;
2012-07-30 14:58:40 +02:00
$ scsicontroller - > { $ controller } = 1 ;
2020-04-08 17:44:16 +02:00
}
2011-12-07 11:54:31 +01:00
2012-02-01 13:25:20 +01:00
if ( $ drive - > { interface } eq 'sata' ) {
2020-04-08 17:44:16 +02:00
my $ controller = int ( $ drive - > { index } / $ PVE:: QemuServer:: Drive:: MAX_SATA_DISKS ) ;
2021-10-20 17:31:45 +02:00
my $ pciaddr = print_pci_addr ( "ahci$controller" , $ bridges , $ arch , $ machine_type ) ;
2020-09-02 14:07:02 +02:00
push @$ devices , '-device' , "ahci,id=ahci$controller,multifunction=on$pciaddr"
if ! $ ahcicontroller - > { $ controller } ;
2020-04-08 17:44:16 +02:00
$ ahcicontroller - > { $ controller } = 1 ;
2012-02-01 13:25:20 +01:00
}
2014-05-17 09:07:18 +02:00
2021-03-03 10:56:08 +01:00
my $ pbs_conf = $ pbs_backing - > { $ ds } ;
my $ pbs_name = undef ;
if ( $ pbs_conf ) {
$ pbs_name = "drive-$ds-pbs" ;
push @$ devices , '-blockdev' , print_pbs_blockdev ( $ pbs_conf , $ pbs_name ) ;
}
2021-06-21 17:33:18 +02:00
my $ drive_cmd = print_drive_commandline_full (
$ storecfg , $ vmid , $ drive , $ pbs_name , min_version ( $ kvmver , 6 , 0 ) ) ;
2021-04-26 14:11:03 +02:00
# extra protection for templates, but SATA and IDE don't support it..
2021-06-04 11:47:44 +02:00
$ drive_cmd . = ',readonly=on' if drive_is_read_only ( $ conf , $ drive ) ;
2020-08-06 13:13:48 +02:00
2014-05-13 03:10:40 +02:00
push @$ devices , '-drive' , $ drive_cmd ;
2020-09-02 14:07:02 +02:00
push @$ devices , '-device' , print_drivedevice_full (
$ storecfg , $ conf , $ vmid , $ drive , $ bridges , $ arch , $ machine_type ) ;
2011-08-23 07:47:04 +02:00
} ) ;
2012-01-28 11:02:28 +01:00
for ( my $ i = 0 ; $ i < $ MAX_NETS ; $ i + + ) {
2020-10-06 15:32:15 +02:00
my $ netname = "net$i" ;
next if ! $ conf - > { $ netname } ;
my $ d = parse_net ( $ conf - > { $ netname } ) ;
2020-10-06 15:32:12 +02:00
next if ! $ d ;
2022-11-13 13:38:55 +01:00
# save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
2011-08-23 07:47:04 +02:00
2020-10-06 15:32:12 +02:00
$ use_virtio = 1 if $ d - > { model } eq 'virtio' ;
2011-08-23 07:47:04 +02:00
2020-10-06 15:32:15 +02:00
$ d - > { bootindex } = $ bootorder - > { $ netname } if $ bootorder - > { $ netname } ;
2011-08-23 07:47:04 +02:00
2020-10-06 15:32:15 +02:00
my $ netdevfull = print_netdev_full ( $ vmid , $ conf , $ arch , $ d , $ netname ) ;
2020-10-06 15:32:12 +02:00
push @$ devices , '-netdev' , $ netdevfull ;
2012-08-20 11:10:24 +02:00
2020-10-06 15:32:12 +02:00
my $ netdevicefull = print_netdevice_full (
2022-11-13 15:37:35 +01:00
$ vmid , $ conf , $ d , $ netname , $ bridges , $ use_old_bios_files , $ arch , $ machine_type , $ machine_version ) ;
2020-09-02 14:07:02 +02:00
2020-10-06 15:32:12 +02:00
push @$ devices , '-device' , $ netdevicefull ;
2012-08-20 11:10:24 +02:00
}
2011-08-23 07:47:04 +02:00
2019-02-22 11:38:33 +01:00
if ( $ conf - > { ivshmem } ) {
2020-09-02 14:07:02 +02:00
my $ ivshmem = parse_property_string ( $ ivshmem_fmt , $ conf - > { ivshmem } ) ;
2019-02-26 08:09:43 +01:00
2019-02-22 11:38:33 +01:00
my $ bus ;
if ( $ q35 ) {
$ bus = print_pcie_addr ( "ivshmem" ) ;
} else {
$ bus = print_pci_addr ( "ivshmem" , $ bridges , $ arch , $ machine_type ) ;
}
2019-02-26 08:09:43 +01:00
my $ ivshmem_name = $ ivshmem - > { name } // $ vmid ;
my $ path = '/dev/shm/pve-shm-' . $ ivshmem_name ;
2019-02-22 11:38:33 +01:00
push @$ devices , '-device' , "ivshmem-plain,memdev=ivshmem$bus," ;
2020-09-02 14:07:02 +02:00
push @$ devices , '-object' , "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
. ",size=$ivshmem->{size}M" ;
2019-02-22 11:38:33 +01:00
}
2020-01-31 15:41:22 +01:00
# pci.4 is nested in pci.1
$ bridges - > { 1 } = 1 if $ bridges - > { 4 } ;
2021-10-20 17:31:45 +02:00
if ( ! $ q35 ) { # add pci bridges
if ( min_version ( $ machine_version , 2 , 3 ) ) {
2015-03-17 09:47:10 +01:00
$ bridges - > { 1 } = 1 ;
$ bridges - > { 2 } = 1 ;
}
2015-03-27 03:41:52 +01:00
$ bridges - > { 3 } = 1 if $ scsihw =~ m/^virtio-scsi-single/ ;
2020-01-31 15:41:22 +01:00
}
for my $ k ( sort { $ b cmp $ a } keys %$ bridges ) {
next if $ q35 && $ k < 4 ; # q35.cfg already includes bridges up to 3
2020-06-18 16:36:54 +02:00
my $ k_name = $ k ;
if ( $ k == 2 && $ legacy_igd ) {
$ k_name = "$k-igd" ;
}
2021-10-20 17:31:45 +02:00
my $ pciaddr = print_pci_addr ( "pci.$k_name" , undef , $ arch , $ machine_type ) ;
2020-01-31 15:41:22 +01:00
my $ devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr" ;
2021-10-20 17:31:45 +02:00
if ( $ q35 ) { # add after -readconfig pve-q35.cfg
2020-01-31 15:41:22 +01:00
splice @$ devices , 2 , 0 , '-device' , $ devstr ;
} else {
unshift @$ devices , '-device' , $ devstr if $ k > 0 ;
2014-05-26 09:49:56 +02:00
}
2011-09-12 12:26:00 +02:00
}
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
if ( ! $ kvm ) {
push @$ machineFlags , 'accel=tcg' ;
}
2021-11-05 14:06:11 +01:00
push @$ machineFlags , 'smm=off' if should_disable_smm ( $ conf , $ vga ) ;
Use 'QEMU version' -> '+pve-version' mapping for machine types
The previously introduced approach can fail for pinned versions when a
new QEMU release is introduced. The saner approach is to use a mapping
that gives one pve-version for each QEMU release.
Fortunately, the old system has not been bumped yet, so we can still
change it without too much effort.
QEMU versions without a mapping are assumed to be pve0, 4.1 is mapped to
pve1 since thats what we had as our default previously.
Pinned machine versions (i.e. pc-i440fx-4.1) are always assumed to be
pve0, for specific pve-versions they'd have to be pinned as well (i.e.
pc-i440fx-4.1+pve1).
The new logic also makes the pve-version dynamic, and starts VMs with
the lowest possible 'feature-level', i.e. if a feature is only available
with 4.1+pve2, but the VM isn't using it, we still start it with
4.1+pve0.
We die if we don't support a version that is requested from us. This
allows us to use the pve-version as live-migration blocks (i.e. bumping
the version and then live-migrating a VM which uses the new feature (so
is running with the bumped version) to an outdated node will present the
user with a helpful error message and fail instead of silently modifying
the config and only failing *after* the migration).
$version_guard is introduced in config_to_command to use for features
that need to check pve-version, it automatically handles selecting the
newest necessary pve-version for the VM.
Tests have to be adjusted, since all of them now resolve to pve0 instead
of pve1. EXPECT_ERROR matching is changed to use 'eq' instead of regex
to allow special characters in error messages.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2020-02-10 16:05:35 +01:00
my $ machine_type_min = $ machine_type ;
if ( $ add_pve_version ) {
$ machine_type_min =~ s/\+pve\d+$// ;
$ machine_type_min . = "+pve$required_pve_version" ;
}
push @$ machineFlags , "type=${machine_type_min}" ;
2012-08-20 11:10:24 +02:00
push @$ cmd , @$ devices ;
2020-04-08 17:44:16 +02:00
push @$ cmd , '-rtc' , join ( ',' , @$ rtcFlags ) if scalar ( @$ rtcFlags ) ;
push @$ cmd , '-machine' , join ( ',' , @$ machineFlags ) if scalar ( @$ machineFlags ) ;
push @$ cmd , '-global' , join ( ',' , @$ globalFlags ) if scalar ( @$ globalFlags ) ;
2012-09-26 12:42:03 +02:00
2019-03-14 17:04:48 +01:00
if ( my $ vmstate = $ conf - > { vmstate } ) {
my $ statepath = PVE::Storage:: path ( $ storecfg , $ vmstate ) ;
2019-10-22 16:31:16 +02:00
push @$ vollist , $ vmstate ;
2019-03-14 17:04:48 +01:00
push @$ cmd , '-loadstate' , $ statepath ;
2019-11-29 11:06:46 +01:00
print "activating and using '$vmstate' as vmstate\n" ;
2019-03-14 17:04:48 +01:00
}
2021-06-04 11:47:47 +02:00
if ( PVE::QemuConfig - > is_template ( $ conf ) ) {
# needed to workaround base volumes being read-only
push @$ cmd , '-snapshot' ;
}
2018-12-06 10:17:25 +01:00
# add custom args
if ( $ conf - > { args } ) {
my $ aa = PVE::Tools:: split_args ( $ conf - > { args } ) ;
push @$ cmd , @$ aa ;
}
2013-07-24 11:24:20 +02:00
return wantarray ? ( $ cmd , $ vollist , $ spice_port ) : $ cmd ;
2011-08-23 07:47:04 +02:00
}
2011-09-12 12:26:00 +02:00
2020-05-05 16:53:12 +02:00
sub check_rng_source {
my ( $ source ) = @ _ ;
# mostly relevant for /dev/hwrng, but doesn't hurt to check others too
die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
if ! - e $ source ;
my $ rng_current = '/sys/devices/virtual/misc/hw_random/rng_current' ;
if ( $ source eq '/dev/hwrng' && file_read_firstline ( $ rng_current ) eq 'none' ) {
2020-09-02 14:07:02 +02:00
# Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
# be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
. " '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
. " to the host.\n" ;
2020-05-05 16:53:12 +02:00
}
}
2013-07-17 11:33:02 +02:00
sub spice_port {
2013-06-25 07:10:42 +02:00
my ( $ vmid ) = @ _ ;
2013-07-17 11:33:02 +02:00
2019-11-19 12:23:47 +01:00
my $ res = mon_cmd ( $ vmid , 'query-spice' ) ;
2013-07-17 11:33:02 +02:00
return $ res - > { 'tls-port' } || $ res - > { 'port' } || die "no spice port\n" ;
2013-06-25 07:10:42 +02:00
}
2011-10-10 16:46:54 +02:00
sub vm_devices_list {
my ( $ vmid ) = @ _ ;
2019-11-19 12:23:47 +01:00
my $ res = mon_cmd ( $ vmid , 'query-pci' ) ;
2018-04-12 17:04:56 +02:00
my $ devices_to_check = [] ;
2012-07-13 08:42:13 +02:00
my $ devices = { } ;
foreach my $ pcibus ( @$ res ) {
2018-04-12 17:04:56 +02:00
push @$ devices_to_check , @ { $ pcibus - > { devices } } ,
}
while ( @$ devices_to_check ) {
my $ to_check = [] ;
for my $ d ( @$ devices_to_check ) {
$ devices - > { $ d - > { 'qdev_id' } } = 1 if $ d - > { 'qdev_id' } ;
2023-01-10 14:41:37 +01:00
next if ! $ d - > { 'pci_bridge' } || ! $ d - > { 'pci_bridge' } - > { devices } ;
2018-04-12 17:04:56 +02:00
$ devices - > { $ d - > { 'qdev_id' } } += scalar ( @ { $ d - > { 'pci_bridge' } - > { devices } } ) ;
push @$ to_check , @ { $ d - > { 'pci_bridge' } - > { devices } } ;
2014-08-29 15:04:15 +02:00
}
2018-04-12 17:04:56 +02:00
$ devices_to_check = $ to_check ;
2014-08-29 15:04:15 +02:00
}
2019-11-19 12:23:47 +01:00
my $ resblock = mon_cmd ( $ vmid , 'query-block' ) ;
2014-08-29 15:04:15 +02:00
foreach my $ block ( @$ resblock ) {
if ( $ block - > { device } =~ m/^drive-(\S+)/ ) {
$ devices - > { $ 1 } = 1 ;
2011-10-11 13:28:50 +02:00
}
}
2011-10-10 16:46:54 +02:00
2019-11-19 12:23:47 +01:00
my $ resmice = mon_cmd ( $ vmid , 'query-mice' ) ;
2014-11-18 13:29:21 +01:00
foreach my $ mice ( @$ resmice ) {
if ( $ mice - > { name } eq 'QEMU HID Tablet' ) {
$ devices - > { tablet } = 1 ;
last ;
}
}
2016-06-14 10:50:38 +02:00
# for usb devices there is no query-usb
# but we can iterate over the entries in
# qom-list path=/machine/peripheral
2019-11-19 12:23:47 +01:00
my $ resperipheral = mon_cmd ( $ vmid , 'qom-list' , path = > '/machine/peripheral' ) ;
2016-06-14 10:50:38 +02:00
foreach my $ per ( @$ resperipheral ) {
2022-11-10 15:35:58 +01:00
if ( $ per - > { name } =~ m/^usb(?:redirdev)?\d+$/ ) {
2016-06-14 10:50:38 +02:00
$ devices - > { $ per - > { name } } = 1 ;
}
}
2011-10-11 13:28:50 +02:00
return $ devices ;
2011-10-10 16:46:54 +02:00
}
2012-01-20 11:42:03 +01:00
sub vm_deviceplug {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ vmid , $ deviceid , $ device , $ arch , $ machine_type ) = @ _ ;
2012-02-03 10:23:50 +01:00
2019-11-19 12:23:48 +01:00
my $ q35 = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
2014-06-18 06:54:45 +02:00
2012-02-05 14:19:06 +01:00
my $ devices_list = vm_devices_list ( $ vmid ) ;
return 1 if defined ( $ devices_list - > { $ deviceid } ) ;
2020-09-02 14:07:02 +02:00
# add PCI bridge if we need it for the device
qemu_add_pci_bridge ( $ storecfg , $ conf , $ vmid , $ deviceid , $ arch , $ machine_type ) ;
2014-11-25 09:13:37 +01:00
2014-11-18 13:29:21 +01:00
if ( $ deviceid eq 'tablet' ) {
2018-11-12 14:10:42 +01:00
qemu_deviceadd ( $ vmid , print_tabletdevice_full ( $ conf , $ arch ) ) ;
} elsif ( $ deviceid eq 'keyboard' ) {
qemu_deviceadd ( $ vmid , print_keyboarddevice_full ( $ conf , $ arch ) ) ;
2022-11-10 15:35:58 +01:00
} elsif ( $ deviceid =~ m/^usbredirdev(\d+)$/ ) {
my $ id = $ 1 ;
qemu_spice_usbredir_chardev_add ( $ vmid , "usbredirchardev$id" ) ;
qemu_deviceadd ( $ vmid , PVE::QemuServer::USB:: print_spice_usbdevice ( $ id , "xhci" , $ id + 1 ) ) ;
2016-06-14 10:50:40 +02:00
} elsif ( $ deviceid =~ m/^usb(\d+)$/ ) {
2022-11-10 15:35:58 +01:00
qemu_deviceadd ( $ vmid , PVE::QemuServer::USB:: print_usbdevice_full ( $ conf , $ deviceid , $ device , { } , $ 1 + 1 ) ) ;
2014-11-25 09:13:37 +01:00
} elsif ( $ deviceid =~ m/^(virtio)(\d+)$/ ) {
2015-03-19 11:06:12 +01:00
qemu_iothread_add ( $ vmid , $ deviceid , $ device ) ;
2021-10-20 17:31:45 +02:00
qemu_driveadd ( $ storecfg , $ vmid , $ device ) ;
my $ devicefull = print_drivedevice_full ( $ storecfg , $ conf , $ vmid , $ device , undef , $ arch , $ machine_type ) ;
2014-11-25 09:13:37 +01:00
2021-10-20 17:31:45 +02:00
qemu_deviceadd ( $ vmid , $ devicefull ) ;
2014-11-25 09:13:37 +01:00
eval { qemu_deviceaddverify ( $ vmid , $ deviceid ) ; } ;
if ( my $ err = $@ ) {
2014-11-25 08:29:12 +01:00
eval { qemu_drivedel ( $ vmid , $ deviceid ) ; } ;
warn $@ if $@ ;
2014-11-25 09:13:37 +01:00
die $ err ;
2012-01-20 11:42:05 +01:00
}
2015-03-27 03:41:53 +01:00
} elsif ( $ deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/ ) {
2021-10-20 17:31:45 +02:00
my $ scsihw = defined ( $ conf - > { scsihw } ) ? $ conf - > { scsihw } : "lsi" ;
my $ pciaddr = print_pci_addr ( $ deviceid , undef , $ arch , $ machine_type ) ;
2015-03-27 06:16:24 +01:00
my $ scsihw_type = $ scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $ scsihw ;
2015-03-27 03:41:53 +01:00
2021-10-20 17:31:45 +02:00
my $ devicefull = "$scsihw_type,id=$deviceid$pciaddr" ;
2014-11-25 09:13:37 +01:00
2015-04-01 05:11:43 +02:00
if ( $ deviceid =~ m/^virtioscsi(\d+)$/ && $ device - > { iothread } ) {
qemu_iothread_add ( $ vmid , $ deviceid , $ device ) ;
$ devicefull . = ",iothread=iothread-$deviceid" ;
}
2015-04-02 06:10:54 +02:00
if ( $ deviceid =~ m/^virtioscsi(\d+)$/ && $ device - > { queues } ) {
$ devicefull . = ",num_queues=$device->{queues}" ;
}
2021-10-20 17:31:45 +02:00
qemu_deviceadd ( $ vmid , $ devicefull ) ;
qemu_deviceaddverify ( $ vmid , $ deviceid ) ;
2014-11-25 09:13:37 +01:00
} elsif ( $ deviceid =~ m/^(scsi)(\d+)$/ ) {
2018-11-12 14:10:42 +01:00
qemu_findorcreatescsihw ( $ storecfg , $ conf , $ vmid , $ device , $ arch , $ machine_type ) ;
2014-11-25 09:13:37 +01:00
qemu_driveadd ( $ storecfg , $ vmid , $ device ) ;
2015-03-27 06:16:24 +01:00
2020-11-05 10:12:26 +01:00
my $ devicefull = print_drivedevice_full ( $ storecfg , $ conf , $ vmid , $ device , undef , $ arch , $ machine_type ) ;
2014-11-25 09:13:37 +01:00
eval { qemu_deviceadd ( $ vmid , $ devicefull ) ; } ;
if ( my $ err = $@ ) {
2014-11-25 08:29:12 +01:00
eval { qemu_drivedel ( $ vmid , $ deviceid ) ; } ;
warn $@ if $@ ;
2014-11-25 09:13:37 +01:00
die $ err ;
2012-01-20 11:42:07 +01:00
}
2014-11-25 09:13:37 +01:00
} elsif ( $ deviceid =~ m/^(net)(\d+)$/ ) {
2020-10-16 16:20:05 +02:00
return if ! qemu_netdevadd ( $ vmid , $ conf , $ arch , $ device , $ deviceid ) ;
2015-11-06 10:27:06 +01:00
2019-11-19 12:23:48 +01:00
my $ machine_type = PVE::QemuServer::Machine:: qemu_machine_pxe ( $ vmid , $ conf ) ;
2022-11-13 15:37:35 +01:00
my $ machine_version = PVE::QemuServer::Machine:: extract_version ( $ machine_type ) ;
2019-02-28 09:16:00 +01:00
my $ use_old_bios_files = undef ;
( $ use_old_bios_files , $ machine_type ) = qemu_use_old_bios_files ( $ machine_type ) ;
2015-11-06 10:27:06 +01:00
2020-09-02 14:07:02 +02:00
my $ netdevicefull = print_netdevice_full (
2022-11-13 15:37:35 +01:00
$ vmid , $ conf , $ device , $ deviceid , undef , $ use_old_bios_files , $ arch , $ machine_type , $ machine_version ) ;
2019-02-28 09:16:00 +01:00
qemu_deviceadd ( $ vmid , $ netdevicefull ) ;
2019-02-28 09:15:59 +01:00
eval {
qemu_deviceaddverify ( $ vmid , $ deviceid ) ;
qemu_set_link_status ( $ vmid , $ deviceid , ! $ device - > { link_down } ) ;
} ;
2014-11-25 09:13:37 +01:00
if ( my $ err = $@ ) {
eval { qemu_netdevdel ( $ vmid , $ deviceid ) ; } ;
warn $@ if $@ ;
die $ err ;
2019-02-28 09:16:00 +01:00
}
2014-11-25 09:13:37 +01:00
} elsif ( ! $ q35 && $ deviceid =~ m/^(pci\.)(\d+)$/ ) {
2012-08-20 13:06:59 +02:00
my $ bridgeid = $ 2 ;
2018-11-12 14:10:42 +01:00
my $ pciaddr = print_pci_addr ( $ deviceid , undef , $ arch , $ machine_type ) ;
2012-08-20 13:06:59 +02:00
my $ devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr" ;
2015-03-27 06:16:24 +01:00
2012-08-20 13:06:59 +02:00
qemu_deviceadd ( $ vmid , $ devicefull ) ;
2014-11-25 09:13:37 +01:00
qemu_deviceaddverify ( $ vmid , $ deviceid ) ;
} else {
2015-03-27 06:16:24 +01:00
die "can't hotplug device '$deviceid'\n" ;
2012-08-20 13:06:59 +02:00
}
2012-01-20 11:42:05 +01:00
return 1 ;
2011-10-10 16:46:55 +02:00
}
2014-11-25 06:58:33 +01:00
# fixme: this should raise exceptions on error!
2012-01-20 11:42:03 +01:00
sub vm_deviceunplug {
2011-10-14 10:14:11 +02:00
my ( $ vmid , $ conf , $ deviceid ) = @ _ ;
2011-10-10 16:46:56 +02:00
2012-02-05 14:19:06 +01:00
my $ devices_list = vm_devices_list ( $ vmid ) ;
return 1 if ! defined ( $ devices_list - > { $ deviceid } ) ;
2020-10-06 15:32:15 +02:00
my $ bootdisks = PVE::QemuServer::Drive:: get_bootdisks ( $ conf ) ;
die "can't unplug bootdisk '$deviceid'\n" if grep { $ _ eq $ deviceid } @$ bootdisks ;
2014-11-25 08:29:12 +01:00
2022-11-10 15:35:58 +01:00
if ( $ deviceid eq 'tablet' || $ deviceid eq 'keyboard' || $ deviceid eq 'xhci' ) {
2014-11-18 13:29:21 +01:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
2022-11-10 15:35:58 +01:00
} elsif ( $ deviceid =~ m/^usbredirdev\d+$/ ) {
qemu_devicedel ( $ vmid , $ deviceid ) ;
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
2016-06-14 10:50:40 +02:00
} elsif ( $ deviceid =~ m/^usb\d+$/ ) {
2022-11-10 15:35:58 +01:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
2014-11-25 08:29:12 +01:00
} elsif ( $ deviceid =~ m/^(virtio)(\d+)$/ ) {
2021-09-09 14:40:42 +02:00
my $ device = parse_drive ( $ deviceid , $ conf - > { $ deviceid } ) ;
2011-10-14 10:14:11 +02:00
2021-09-09 17:50:21 +02:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
qemu_drivedel ( $ vmid , $ deviceid ) ;
2021-09-09 14:40:42 +02:00
qemu_iothread_del ( $ vmid , $ deviceid , $ device ) ;
2015-03-27 03:41:53 +01:00
} elsif ( $ deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/ ) {
2014-11-25 08:29:12 +01:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
2015-03-18 11:08:04 +01:00
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
2014-11-25 08:29:12 +01:00
} elsif ( $ deviceid =~ m/^(scsi)(\d+)$/ ) {
2021-09-09 14:40:42 +02:00
my $ device = parse_drive ( $ deviceid , $ conf - > { $ deviceid } ) ;
2012-01-20 11:42:06 +01:00
2021-09-09 17:50:21 +02:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
2022-03-09 11:09:03 +01:00
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
2021-09-09 17:50:21 +02:00
qemu_drivedel ( $ vmid , $ deviceid ) ;
2015-03-27 06:16:24 +01:00
qemu_deletescsihw ( $ conf , $ vmid , $ deviceid ) ;
2015-03-18 11:08:04 +01:00
2021-09-09 14:40:42 +02:00
qemu_iothread_del ( $ vmid , "virtioscsi$device->{index}" , $ device )
if $ conf - > { scsihw } && ( $ conf - > { scsihw } eq 'virtio-scsi-single' ) ;
2014-11-25 08:29:12 +01:00
} elsif ( $ deviceid =~ m/^(net)(\d+)$/ ) {
2021-09-09 17:50:21 +02:00
qemu_devicedel ( $ vmid , $ deviceid ) ;
qemu_devicedelverify ( $ vmid , $ deviceid ) ;
qemu_netdevdel ( $ vmid , $ deviceid ) ;
2014-11-25 08:29:12 +01:00
} else {
die "can't unplug device '$deviceid'\n" ;
2012-01-28 11:02:29 +01:00
}
2012-01-20 11:42:05 +01:00
return 1 ;
}
2022-11-10 15:35:58 +01:00
sub qemu_spice_usbredir_chardev_add {
my ( $ vmid , $ id ) = @ _ ;
mon_cmd ( $ vmid , "chardev-add" , (
id = > $ id ,
backend = > {
type = > 'spicevmc' ,
data = > {
type = > "usbredir" ,
} ,
} ,
) ) ;
}
2012-01-20 11:42:05 +01:00
sub qemu_deviceadd {
my ( $ vmid , $ devicefull ) = @ _ ;
2011-10-10 16:46:56 +02:00
2013-02-19 10:22:07 +01:00
$ devicefull = "driver=" . $ devicefull ;
my % options = split ( /[=,]/ , $ devicefull ) ;
2011-10-14 10:14:11 +02:00
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "device_add" , % options ) ;
2012-01-20 11:42:05 +01:00
}
2012-01-27 09:35:26 +01:00
2012-01-20 11:42:05 +01:00
sub qemu_devicedel {
2014-11-25 09:13:37 +01:00
my ( $ vmid , $ deviceid ) = @ _ ;
2014-11-25 08:29:12 +01:00
2019-11-19 12:23:47 +01:00
my $ ret = mon_cmd ( $ vmid , "device_del" , id = > $ deviceid ) ;
2012-01-20 11:42:05 +01:00
}
2015-03-19 11:06:12 +01:00
sub qemu_iothread_add {
2021-09-09 17:50:21 +02:00
my ( $ vmid , $ deviceid , $ device ) = @ _ ;
2015-03-19 11:06:12 +01:00
if ( $ device - > { iothread } ) {
my $ iothreads = vm_iothreads_list ( $ vmid ) ;
qemu_objectadd ( $ vmid , "iothread-$deviceid" , "iothread" ) if ! $ iothreads - > { "iothread-$deviceid" } ;
}
}
sub qemu_iothread_del {
2021-09-09 17:50:21 +02:00
my ( $ vmid , $ deviceid , $ device ) = @ _ ;
2015-03-19 11:06:12 +01:00
if ( $ device - > { iothread } ) {
my $ iothreads = vm_iothreads_list ( $ vmid ) ;
qemu_objectdel ( $ vmid , "iothread-$deviceid" ) if $ iothreads - > { "iothread-$deviceid" } ;
}
}
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
sub qemu_objectadd {
2021-09-09 17:50:21 +02:00
my ( $ vmid , $ objectid , $ qomtype ) = @ _ ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "object-add" , id = > $ objectid , "qom-type" = > $ qomtype ) ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
return 1 ;
}
sub qemu_objectdel {
2021-09-09 17:50:21 +02:00
my ( $ vmid , $ objectid ) = @ _ ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "object-del" , id = > $ objectid ) ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
return 1 ;
}
2012-01-20 11:42:05 +01:00
sub qemu_driveadd {
2014-11-25 09:13:37 +01:00
my ( $ storecfg , $ vmid , $ device ) = @ _ ;
2012-01-20 11:42:05 +01:00
2021-06-21 17:33:18 +02:00
my $ kvmver = get_running_qemu_version ( $ vmid ) ;
my $ io_uring = min_version ( $ kvmver , 6 , 0 ) ;
my $ drive = print_drive_commandline_full ( $ storecfg , $ vmid , $ device , undef , $ io_uring ) ;
2015-03-05 10:34:10 +01:00
$ drive =~ s/\\/\\\\/g ;
2019-11-19 12:23:47 +01:00
my $ ret = PVE::QemuServer::Monitor:: hmp_cmd ( $ vmid , "drive_add auto \"$drive\"" ) ;
2014-11-25 09:13:37 +01:00
2012-01-20 11:42:05 +01:00
# If the command succeeds qemu prints: "OK"
2014-11-25 09:13:37 +01:00
return 1 if $ ret =~ m/OK/s ;
die "adding drive failed: $ret\n" ;
2012-01-20 11:42:05 +01:00
}
2012-01-27 09:35:26 +01:00
2012-01-20 11:42:05 +01:00
sub qemu_drivedel {
2021-09-09 17:50:21 +02:00
my ( $ vmid , $ deviceid ) = @ _ ;
2011-10-10 16:46:56 +02:00
2019-11-19 12:23:47 +01:00
my $ ret = PVE::QemuServer::Monitor:: hmp_cmd ( $ vmid , "drive_del drive-$deviceid" ) ;
2012-01-20 11:42:05 +01:00
$ ret =~ s/^\s+// ;
2015-03-27 06:16:24 +01:00
2014-11-25 08:29:12 +01:00
return 1 if $ ret eq "" ;
2015-03-27 06:16:24 +01:00
2014-11-25 08:29:12 +01:00
# NB: device not found errors mean the drive was auto-deleted and we ignore the error
2015-03-27 06:16:24 +01:00
return 1 if $ ret =~ m/Device \'.*?\' not found/s ;
2014-11-25 08:29:12 +01:00
die "deleting drive $deviceid failed : $ret\n" ;
2012-01-20 11:42:05 +01:00
}
2011-10-14 10:14:11 +02:00
2012-01-20 11:42:05 +01:00
sub qemu_deviceaddverify {
2014-11-25 09:13:37 +01:00
my ( $ vmid , $ deviceid ) = @ _ ;
2011-10-10 16:46:56 +02:00
2012-01-20 11:42:05 +01:00
for ( my $ i = 0 ; $ i <= 5 ; $ i + + ) {
my $ devices_list = vm_devices_list ( $ vmid ) ;
return 1 if defined ( $ devices_list - > { $ deviceid } ) ;
sleep 1 ;
2012-01-27 09:35:26 +01:00
}
2014-11-25 09:13:37 +01:00
die "error on hotplug device '$deviceid'\n" ;
2012-01-20 11:42:05 +01:00
}
2012-01-27 09:35:26 +01:00
2012-01-20 11:42:05 +01:00
sub qemu_devicedelverify {
2014-11-25 08:29:12 +01:00
my ( $ vmid , $ deviceid ) = @ _ ;
2015-03-27 06:16:24 +01:00
# need to verify that the device is correctly removed as device_del
2014-11-25 08:29:12 +01:00
# is async and empty return is not reliable
2012-01-20 11:42:05 +01:00
for ( my $ i = 0 ; $ i <= 5 ; $ i + + ) {
my $ devices_list = vm_devices_list ( $ vmid ) ;
return 1 if ! defined ( $ devices_list - > { $ deviceid } ) ;
sleep 1 ;
2012-01-27 09:35:26 +01:00
}
2014-11-25 08:29:12 +01:00
die "error on hot-unplugging device '$deviceid'\n" ;
2011-10-10 16:46:56 +02:00
}
2012-07-30 14:58:40 +02:00
sub qemu_findorcreatescsihw {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ vmid , $ device , $ arch , $ machine_type ) = @ _ ;
2012-01-20 11:42:06 +01:00
2015-03-27 03:41:54 +01:00
my ( $ maxdev , $ controller , $ controller_prefix ) = scsihw_infos ( $ conf , $ device ) ;
2015-03-27 03:41:53 +01:00
my $ scsihwid = "$controller_prefix$controller" ;
2012-01-20 11:42:06 +01:00
my $ devices_list = vm_devices_list ( $ vmid ) ;
2021-09-09 17:50:21 +02:00
if ( ! defined ( $ devices_list - > { $ scsihwid } ) ) {
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , $ scsihwid , $ device , $ arch , $ machine_type ) ;
2012-01-20 11:42:06 +01:00
}
2014-11-25 09:13:37 +01:00
2012-01-20 11:42:06 +01:00
return 1 ;
}
2015-03-18 11:08:04 +01:00
sub qemu_deletescsihw {
my ( $ conf , $ vmid , $ opt ) = @ _ ;
my $ device = parse_drive ( $ opt , $ conf - > { $ opt } ) ;
2015-03-27 06:15:01 +01:00
if ( $ conf - > { scsihw } && ( $ conf - > { scsihw } eq 'virtio-scsi-single' ) ) {
2015-03-27 03:41:53 +01:00
vm_deviceunplug ( $ vmid , $ conf , "virtioscsi$device->{index}" ) ;
return 1 ;
}
2015-03-27 03:41:54 +01:00
my ( $ maxdev , $ controller , $ controller_prefix ) = scsihw_infos ( $ conf , $ device ) ;
2015-03-18 11:08:04 +01:00
my $ devices_list = vm_devices_list ( $ vmid ) ;
foreach my $ opt ( keys % { $ devices_list } ) {
2020-03-02 11:33:44 +01:00
if ( is_valid_drivename ( $ opt ) ) {
my $ drive = parse_drive ( $ opt , $ conf - > { $ opt } ) ;
2021-09-09 17:50:21 +02:00
if ( $ drive - > { interface } eq 'scsi' && $ drive - > { index } < ( ( $ maxdev - 1 ) * ( $ controller + 1 ) ) ) {
2015-03-18 11:08:04 +01:00
return 1 ;
}
}
}
my $ scsihwid = "scsihw$controller" ;
vm_deviceunplug ( $ vmid , $ conf , $ scsihwid ) ;
return 1 ;
}
2014-11-24 10:13:21 +01:00
sub qemu_add_pci_bridge {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ vmid , $ device , $ arch , $ machine_type ) = @ _ ;
2012-08-20 13:06:59 +02:00
my $ bridges = { } ;
2014-11-24 10:13:21 +01:00
my $ bridgeid ;
2018-11-12 14:10:42 +01:00
print_pci_addr ( $ device , $ bridges , $ arch , $ machine_type ) ;
2012-08-20 13:06:59 +02:00
while ( my ( $ k , $ v ) = each %$ bridges ) {
$ bridgeid = $ k ;
}
2014-11-25 09:13:37 +01:00
return 1 if ! defined ( $ bridgeid ) || $ bridgeid < 1 ;
2014-11-24 10:13:21 +01:00
2012-08-20 13:06:59 +02:00
my $ bridge = "pci.$bridgeid" ;
my $ devices_list = vm_devices_list ( $ vmid ) ;
2014-11-24 10:13:21 +01:00
if ( ! defined ( $ devices_list - > { $ bridge } ) ) {
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , $ bridge , $ arch , $ machine_type ) ;
2012-08-20 13:06:59 +02:00
}
2014-11-24 10:13:21 +01:00
2012-08-20 13:06:59 +02:00
return 1 ;
}
2015-01-20 11:47:11 +01:00
sub qemu_set_link_status {
my ( $ vmid , $ device , $ up ) = @ _ ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "set_link" , name = > $ device ,
2015-01-20 11:47:11 +01:00
up = > $ up ? JSON:: true : JSON:: false ) ;
}
2012-01-28 11:02:29 +01:00
sub qemu_netdevadd {
2018-11-12 14:10:42 +01:00
my ( $ vmid , $ conf , $ arch , $ device , $ deviceid ) = @ _ ;
2012-01-28 11:02:29 +01:00
2018-11-12 14:10:42 +01:00
my $ netdev = print_netdev_full ( $ vmid , $ conf , $ arch , $ device , $ deviceid , 1 ) ;
2013-02-19 10:22:09 +01:00
my % options = split ( /[=,]/ , $ netdev ) ;
2012-01-28 11:02:29 +01:00
2020-04-21 16:01:11 +02:00
if ( defined ( my $ vhost = $ options { vhost } ) ) {
$ options { vhost } = JSON:: boolean ( PVE::JSONSchema:: parse_boolean ( $ vhost ) ) ;
}
if ( defined ( my $ queues = $ options { queues } ) ) {
$ options { queues } = $ queues + 0 ;
}
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "netdev_add" , % options ) ;
2013-02-19 10:22:09 +01:00
return 1 ;
2012-01-28 11:02:29 +01:00
}
sub qemu_netdevdel {
my ( $ vmid , $ deviceid ) = @ _ ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "netdev_del" , id = > $ deviceid ) ;
2012-01-28 11:02:29 +01:00
}
2016-06-14 10:50:39 +02:00
sub qemu_usb_hotplug {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ vmid , $ deviceid , $ device , $ arch , $ machine_type ) = @ _ ;
2016-06-14 10:50:39 +02:00
return if ! $ device ;
# remove the old one first
vm_deviceunplug ( $ vmid , $ conf , $ deviceid ) ;
# check if xhci controller is necessary and available
2022-11-10 15:35:58 +01:00
my $ devicelist = vm_devices_list ( $ vmid ) ;
2016-06-14 10:50:39 +02:00
2022-11-10 15:35:58 +01:00
if ( ! $ devicelist - > { xhci } ) {
my $ pciaddr = print_pci_addr ( "xhci" , undef , $ arch , $ machine_type ) ;
qemu_deviceadd ( $ vmid , PVE::QemuServer::USB:: print_qemu_xhci_controller ( $ pciaddr ) ) ;
2016-06-14 10:50:39 +02:00
}
2022-11-10 15:35:58 +01:00
# print_usbdevice_full expects the parsed device
2016-06-14 10:50:39 +02:00
my $ d = parse_usb_device ( $ device - > { host } ) ;
$ d - > { usb3 } = $ device - > { usb3 } ;
# add the new one
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , $ deviceid , $ d , $ arch , $ machine_type ) ;
2016-06-14 10:50:39 +02:00
}
2014-01-07 13:32:51 +01:00
sub qemu_cpu_hotplug {
2015-01-09 16:30:36 +01:00
my ( $ vmid , $ conf , $ vcpus ) = @ _ ;
2014-01-07 13:32:51 +01:00
2019-11-19 12:23:48 +01:00
my $ machine_type = PVE::QemuServer::Machine:: get_current_qemu_machine ( $ vmid ) ;
2016-10-17 12:18:56 +02:00
2015-01-09 16:30:36 +01:00
my $ sockets = 1 ;
$ sockets = $ conf - > { smp } if $ conf - > { smp } ; # old style - no longer iused
$ sockets = $ conf - > { sockets } if $ conf - > { sockets } ;
my $ cores = $ conf - > { cores } || 1 ;
my $ maxcpus = $ sockets * $ cores ;
2014-01-07 13:32:51 +01:00
2015-01-09 16:30:36 +01:00
$ vcpus = $ maxcpus if ! $ vcpus ;
2014-11-19 12:59:02 +01:00
2015-01-09 16:30:36 +01:00
die "you can't add more vcpus than maxcpus\n"
if $ vcpus > $ maxcpus ;
2014-11-19 12:59:02 +01:00
2015-01-09 16:30:36 +01:00
my $ currentvcpus = $ conf - > { vcpus } || $ maxcpus ;
2016-10-17 12:18:56 +02:00
2016-10-17 12:18:57 +02:00
if ( $ vcpus < $ currentvcpus ) {
2016-10-17 12:18:56 +02:00
2019-11-19 12:23:49 +01:00
if ( PVE::QemuServer::Machine:: machine_version ( $ machine_type , 2 , 7 ) ) {
2016-10-17 12:18:56 +02:00
for ( my $ i = $ currentvcpus ; $ i > $ vcpus ; $ i - - ) {
qemu_devicedel ( $ vmid , "cpu$i" ) ;
my $ retry = 0 ;
my $ currentrunningvcpus = undef ;
while ( 1 ) {
2020-02-06 10:53:55 +01:00
$ currentrunningvcpus = mon_cmd ( $ vmid , "query-cpus-fast" ) ;
2016-10-17 12:18:56 +02:00
last if scalar ( @ { $ currentrunningvcpus } ) == $ i - 1 ;
2016-10-17 14:49:05 +02:00
raise_param_exc ( { vcpus = > "error unplugging cpu$i" } ) if $ retry > 5 ;
2016-10-17 12:18:56 +02:00
$ retry + + ;
sleep 1 ;
}
#update conf after each succesfull cpu unplug
$ conf - > { vcpus } = scalar ( @ { $ currentrunningvcpus } ) ;
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
}
} else {
2016-10-17 14:49:05 +02:00
die "cpu hot-unplugging requires qemu version 2.7 or higher\n" ;
2016-10-17 12:18:56 +02:00
}
return ;
}
2014-01-07 13:32:51 +01:00
2020-02-06 10:53:55 +01:00
my $ currentrunningvcpus = mon_cmd ( $ vmid , "query-cpus-fast" ) ;
2016-10-17 14:49:05 +02:00
die "vcpus in running vm does not match its configuration\n"
2015-01-09 16:30:36 +01:00
if scalar ( @ { $ currentrunningvcpus } ) != $ currentvcpus ;
2014-01-07 13:32:51 +01:00
2019-11-19 12:23:49 +01:00
if ( PVE::QemuServer::Machine:: machine_version ( $ machine_type , 2 , 7 ) ) {
2016-10-17 12:18:57 +02:00
for ( my $ i = $ currentvcpus + 1 ; $ i <= $ vcpus ; $ i + + ) {
my $ cpustr = print_cpu_device ( $ conf , $ i ) ;
qemu_deviceadd ( $ vmid , $ cpustr ) ;
my $ retry = 0 ;
my $ currentrunningvcpus = undef ;
while ( 1 ) {
2020-02-06 10:53:55 +01:00
$ currentrunningvcpus = mon_cmd ( $ vmid , "query-cpus-fast" ) ;
2016-10-17 12:18:57 +02:00
last if scalar ( @ { $ currentrunningvcpus } ) == $ i ;
2016-10-17 14:49:05 +02:00
raise_param_exc ( { vcpus = > "error hotplugging cpu$i" } ) if $ retry > 10 ;
2016-10-17 12:18:57 +02:00
sleep 1 ;
$ retry + + ;
}
#update conf after each succesfull cpu hotplug
$ conf - > { vcpus } = scalar ( @ { $ currentrunningvcpus } ) ;
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
}
} else {
for ( my $ i = $ currentvcpus ; $ i < $ vcpus ; $ i + + ) {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "cpu-add" , id = > int ( $ i ) ) ;
2016-10-17 12:18:57 +02:00
}
2014-01-07 13:32:51 +01:00
}
}
2012-05-09 14:29:29 +02:00
sub qemu_block_set_io_throttle {
2016-01-11 13:27:46 +01:00
my ( $ vmid , $ deviceid ,
$ bps , $ bps_rd , $ bps_wr , $ iops , $ iops_rd , $ iops_wr ,
2016-11-03 08:17:28 +01:00
$ bps_max , $ bps_rd_max , $ bps_wr_max , $ iops_max , $ iops_rd_max , $ iops_wr_max ,
$ bps_max_length , $ bps_rd_max_length , $ bps_wr_max_length ,
$ iops_max_length , $ iops_rd_max_length , $ iops_wr_max_length ) = @ _ ;
2012-05-09 14:29:29 +02:00
2012-07-15 17:19:06 +02:00
return if ! check_running ( $ vmid ) ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "block_set_io_throttle" , device = > $ deviceid ,
2016-01-11 13:27:46 +01:00
bps = > int ( $ bps ) ,
bps_rd = > int ( $ bps_rd ) ,
bps_wr = > int ( $ bps_wr ) ,
iops = > int ( $ iops ) ,
iops_rd = > int ( $ iops_rd ) ,
iops_wr = > int ( $ iops_wr ) ,
bps_max = > int ( $ bps_max ) ,
bps_rd_max = > int ( $ bps_rd_max ) ,
bps_wr_max = > int ( $ bps_wr_max ) ,
iops_max = > int ( $ iops_max ) ,
iops_rd_max = > int ( $ iops_rd_max ) ,
2016-11-03 08:17:28 +01:00
iops_wr_max = > int ( $ iops_wr_max ) ,
bps_max_length = > int ( $ bps_max_length ) ,
bps_rd_max_length = > int ( $ bps_rd_max_length ) ,
bps_wr_max_length = > int ( $ bps_wr_max_length ) ,
iops_max_length = > int ( $ iops_max_length ) ,
iops_rd_max_length = > int ( $ iops_rd_max_length ) ,
iops_wr_max_length = > int ( $ iops_wr_max_length ) ,
2016-01-11 13:27:46 +01:00
) ;
2012-07-15 17:19:06 +02:00
2012-05-09 14:29:29 +02:00
}
2012-08-06 11:56:34 +02:00
sub qemu_block_resize {
my ( $ vmid , $ deviceid , $ storecfg , $ volid , $ size ) = @ _ ;
2013-01-04 06:57:11 +01:00
my $ running = check_running ( $ vmid ) ;
2012-08-06 11:56:34 +02:00
2017-01-16 10:45:10 +03:00
$ size = 0 if ! PVE::Storage:: volume_resize ( $ storecfg , $ volid , $ size , $ running ) ;
2012-08-06 11:56:34 +02:00
return if ! $ running ;
2020-02-19 11:31:30 +01:00
my $ padding = ( 1024 - $ size % 1024 ) % 1024 ;
$ size = $ size + $ padding ;
2021-03-30 17:59:52 +02:00
mon_cmd (
$ vmid ,
"block_resize" ,
device = > $ deviceid ,
size = > int ( $ size ) ,
timeout = > 60 ,
) ;
2012-08-06 11:56:34 +02:00
}
2012-09-06 10:33:34 +02:00
sub qemu_volume_snapshot {
my ( $ vmid , $ deviceid , $ storecfg , $ volid , $ snap ) = @ _ ;
2013-01-04 06:57:11 +01:00
my $ running = check_running ( $ vmid ) ;
2012-09-06 10:33:34 +02:00
2021-10-14 11:28:48 +02:00
if ( $ running && do_snapshots_with_qemu ( $ storecfg , $ volid , $ deviceid ) ) {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , 'blockdev-snapshot-internal-sync' , device = > $ deviceid , name = > $ snap ) ;
2015-05-06 09:57:34 +02:00
} else {
PVE::Storage:: volume_snapshot ( $ storecfg , $ volid , $ snap ) ;
}
2012-09-06 10:33:34 +02:00
}
2012-09-06 10:33:40 +02:00
sub qemu_volume_snapshot_delete {
my ( $ vmid , $ deviceid , $ storecfg , $ volid , $ snap ) = @ _ ;
2013-01-04 06:57:11 +01:00
my $ running = check_running ( $ vmid ) ;
2012-09-06 10:33:40 +02:00
2018-07-11 13:55:53 +02:00
if ( $ running ) {
$ running = undef ;
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2018-07-11 13:55:53 +02:00
my ( $ ds , $ drive ) = @ _ ;
$ running = 1 if $ drive - > { file } eq $ volid ;
} ) ;
}
2021-10-14 11:28:48 +02:00
if ( $ running && do_snapshots_with_qemu ( $ storecfg , $ volid , $ deviceid ) ) {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , 'blockdev-snapshot-delete-internal-sync' , device = > $ deviceid , name = > $ snap ) ;
2016-11-14 13:01:21 +01:00
} else {
PVE::Storage:: volume_snapshot_delete ( $ storecfg , $ volid , $ snap , $ running ) ;
}
2012-09-06 10:33:40 +02:00
}
2014-01-14 10:33:36 +01:00
sub set_migration_caps {
snapshot: set migration caps before savevm-start
A "savevm" call (both our async variant and the upstream sync one) use
migration code internally. As such, they both expect migration
capabilities to be set.
This is usually not a problem, as the default set of capabilities is ok,
however, it leads to differing snapshot settings if one does a snapshot
after a machine has been live-migrated (as the capabilities will persist
from that), which could potentially lead to discrepencies in snapshots
(currently it seems to be fine, but it still makes sense to set them to
safeguard against future changes).
Note that we do set the "dirty-bitmaps" capability now (if
query-proxmox-support reports true), which has three effects:
1) PBS dirty-bitmaps are preserved in snapshots, enabling
fast-incremental backups to work after rollback (as long as no newer
backups exist), including for hibernate/resume
2) snapshots taken from now on, with a QEMU version supporting bitmap
migration, *might* lead to incompatibility of these snapshots with
QEMU versions that don't know about bitmaps at all (i.e. < 5.0 IIRC?)
- forward compatibility is still given, and all other capabilities we
set go back to very old versions
3) since we now explicitly disable bitmap saving if the version doesn't
report support, we avoid crashes even with not-updated QEMU versions
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-03-16 17:30:23 +01:00
my ( $ vmid , $ savevm ) = @ _ ;
2014-01-09 10:58:48 +01:00
2020-10-22 17:34:20 +02:00
my $ qemu_support = eval { mon_cmd ( $ vmid , "query-proxmox-support" ) } ;
snapshot: set migration caps before savevm-start
A "savevm" call (both our async variant and the upstream sync one) use
migration code internally. As such, they both expect migration
capabilities to be set.
This is usually not a problem, as the default set of capabilities is ok,
however, it leads to differing snapshot settings if one does a snapshot
after a machine has been live-migrated (as the capabilities will persist
from that), which could potentially lead to discrepencies in snapshots
(currently it seems to be fine, but it still makes sense to set them to
safeguard against future changes).
Note that we do set the "dirty-bitmaps" capability now (if
query-proxmox-support reports true), which has three effects:
1) PBS dirty-bitmaps are preserved in snapshots, enabling
fast-incremental backups to work after rollback (as long as no newer
backups exist), including for hibernate/resume
2) snapshots taken from now on, with a QEMU version supporting bitmap
migration, *might* lead to incompatibility of these snapshots with
QEMU versions that don't know about bitmaps at all (i.e. < 5.0 IIRC?)
- forward compatibility is still given, and all other capabilities we
set go back to very old versions
3) since we now explicitly disable bitmap saving if the version doesn't
report support, we avoid crashes even with not-updated QEMU versions
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-03-16 17:30:23 +01:00
my $ bitmap_prop = $ savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration' ;
my $ dirty_bitmaps = $ qemu_support - > { $ bitmap_prop } ? 1 : 0 ;
2014-01-10 13:09:19 +01:00
my $ cap_ref = [] ;
2014-01-09 10:58:48 +01:00
my $ enabled_cap = {
2014-01-10 13:09:19 +01:00
"auto-converge" = > 1 ,
2015-09-25 13:50:36 +02:00
"xbzrle" = > 1 ,
2014-01-10 13:09:19 +01:00
"x-rdma-pin-all" = > 0 ,
"zero-blocks" = > 0 ,
2020-10-22 17:34:20 +02:00
"compress" = > 0 ,
snapshot: set migration caps before savevm-start
A "savevm" call (both our async variant and the upstream sync one) use
migration code internally. As such, they both expect migration
capabilities to be set.
This is usually not a problem, as the default set of capabilities is ok,
however, it leads to differing snapshot settings if one does a snapshot
after a machine has been live-migrated (as the capabilities will persist
from that), which could potentially lead to discrepencies in snapshots
(currently it seems to be fine, but it still makes sense to set them to
safeguard against future changes).
Note that we do set the "dirty-bitmaps" capability now (if
query-proxmox-support reports true), which has three effects:
1) PBS dirty-bitmaps are preserved in snapshots, enabling
fast-incremental backups to work after rollback (as long as no newer
backups exist), including for hibernate/resume
2) snapshots taken from now on, with a QEMU version supporting bitmap
migration, *might* lead to incompatibility of these snapshots with
QEMU versions that don't know about bitmaps at all (i.e. < 5.0 IIRC?)
- forward compatibility is still given, and all other capabilities we
set go back to very old versions
3) since we now explicitly disable bitmap saving if the version doesn't
report support, we avoid crashes even with not-updated QEMU versions
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-03-16 17:30:23 +01:00
"dirty-bitmaps" = > $ dirty_bitmaps ,
2014-01-09 10:58:48 +01:00
} ;
2019-11-19 12:23:47 +01:00
my $ supported_capabilities = mon_cmd ( $ vmid , "query-migrate-capabilities" ) ;
2014-01-09 10:58:48 +01:00
2014-01-10 13:09:19 +01:00
for my $ supported_capability ( @$ supported_capabilities ) {
2014-02-08 20:20:55 +01:00
push @$ cap_ref , {
capability = > $ supported_capability - > { capability } ,
2014-02-10 08:03:50 +01:00
state = > $ enabled_cap - > { $ supported_capability - > { capability } } ? JSON:: true : JSON:: false ,
} ;
2014-01-09 10:58:48 +01:00
}
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "migrate-set-capabilities" , capabilities = > $ cap_ref ) ;
2014-01-10 13:09:19 +01:00
}
2014-01-09 10:58:48 +01:00
2020-04-08 11:24:56 +02:00
sub foreach_volid {
my ( $ conf , $ func , @ param ) = @ _ ;
my $ volhash = { } ;
my $ test_volid = sub {
2020-04-16 14:54:43 +02:00
my ( $ key , $ drive , $ snapname ) = @ _ ;
2020-04-08 11:24:56 +02:00
2020-04-16 14:54:43 +02:00
my $ volid = $ drive - > { file } ;
2020-04-08 11:24:56 +02:00
return if ! $ volid ;
$ volhash - > { $ volid } - > { cdrom } // = 1 ;
2020-04-16 14:54:43 +02:00
$ volhash - > { $ volid } - > { cdrom } = 0 if ! drive_is_cdrom ( $ drive ) ;
2020-04-08 11:24:56 +02:00
2020-04-16 14:54:43 +02:00
my $ replicate = $ drive - > { replicate } // 1 ;
2020-04-08 11:24:56 +02:00
$ volhash - > { $ volid } - > { replicate } // = 0 ;
$ volhash - > { $ volid } - > { replicate } = 1 if $ replicate ;
$ volhash - > { $ volid } - > { shared } // = 0 ;
2020-04-16 14:54:43 +02:00
$ volhash - > { $ volid } - > { shared } = 1 if $ drive - > { shared } ;
2020-04-08 11:24:56 +02:00
$ volhash - > { $ volid } - > { referenced_in_config } // = 0 ;
$ volhash - > { $ volid } - > { referenced_in_config } = 1 if ! defined ( $ snapname ) ;
$ volhash - > { $ volid } - > { referenced_in_snapshot } - > { $ snapname } = 1
if defined ( $ snapname ) ;
2020-04-16 14:54:43 +02:00
my $ size = $ drive - > { size } ;
$ volhash - > { $ volid } - > { size } // = $ size if $ size ;
$ volhash - > { $ volid } - > { is_vmstate } // = 0 ;
$ volhash - > { $ volid } - > { is_vmstate } = 1 if $ key eq 'vmstate' ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
$ volhash - > { $ volid } - > { is_tpmstate } // = 0 ;
$ volhash - > { $ volid } - > { is_tpmstate } = 1 if $ key eq 'tpmstate0' ;
2020-04-16 14:54:43 +02:00
$ volhash - > { $ volid } - > { is_unused } // = 0 ;
$ volhash - > { $ volid } - > { is_unused } = 1 if $ key =~ /^unused\d+$/ ;
2021-01-29 16:11:42 +01:00
$ volhash - > { $ volid } - > { drivename } = $ key if is_valid_drivename ( $ key ) ;
2020-04-08 11:24:56 +02:00
} ;
2020-04-16 14:54:43 +02:00
my $ include_opts = {
extra_keys = > [ 'vmstate' ] ,
include_unused = > 1 ,
} ;
2020-04-29 13:34:08 +02:00
PVE::QemuConfig - > foreach_volume_full ( $ conf , $ include_opts , $ test_volid ) ;
2020-04-08 11:24:56 +02:00
foreach my $ snapname ( keys % { $ conf - > { snapshots } } ) {
my $ snap = $ conf - > { snapshots } - > { $ snapname } ;
2020-04-29 13:34:08 +02:00
PVE::QemuConfig - > foreach_volume_full ( $ snap , $ include_opts , $ test_volid , $ snapname ) ;
2020-04-08 11:24:56 +02:00
}
foreach my $ volid ( keys %$ volhash ) {
& $ func ( $ volid , $ volhash - > { $ volid } , @ param ) ;
}
}
2015-01-07 10:02:32 +01:00
my $ fast_plug_option = {
2015-02-15 09:04:30 +01:00
'lock' = > 1 ,
2015-01-07 10:02:32 +01:00
'name' = > 1 ,
2015-03-27 06:16:24 +01:00
'onboot' = > 1 ,
2015-01-07 10:02:32 +01:00
'shares' = > 1 ,
'startup' = > 1 ,
2015-08-11 11:24:41 +02:00
'description' = > 1 ,
2016-07-19 09:17:36 +02:00
'protection' = > 1 ,
2017-05-15 14:11:59 +02:00
'vmstatestorage' = > 1 ,
2019-01-31 14:33:39 +01:00
'hookscript' = > 1 ,
2019-10-31 13:36:25 +01:00
'tags' = > 1 ,
2015-01-07 10:02:32 +01:00
} ;
2022-06-22 13:52:02 +02:00
for my $ opt ( keys %$ confdesc_cloudinit ) {
$ fast_plug_option - > { $ opt } = 1 ;
} ;
2014-11-19 12:59:02 +01:00
# hotplug changes in [PENDING]
# $selection hash can be used to only apply specified options, for
# example: { cores => 1 } (only apply changed 'cores')
# $errors ref is used to return error messages
2014-11-17 09:50:31 +01:00
sub vmconfig_hotplug_pending {
2014-11-19 12:59:02 +01:00
my ( $ vmid , $ conf , $ storecfg , $ selection , $ errors ) = @ _ ;
2014-11-17 09:50:31 +01:00
2014-11-19 10:43:42 +01:00
my $ defaults = load_defaults ( ) ;
2019-11-25 08:56:58 +01:00
my $ arch = get_vm_arch ( $ conf ) ;
my $ machine_type = get_vm_machine ( $ conf , undef , $ arch ) ;
2014-11-17 09:50:31 +01:00
# commit values which do not have any impact on running VM first
2014-11-19 12:59:02 +01:00
# Note: those option cannot raise errors, we we do not care about
# $selection and always apply them.
my $ add_error = sub {
my ( $ opt , $ msg ) = @ _ ;
$ errors - > { $ opt } = "hotplug problem - $msg" ;
} ;
2014-11-17 09:50:31 +01:00
2022-11-16 18:14:06 +01:00
my $ cloudinit_pending_properties = PVE::QemuServer:: cloudinit_pending_properties ( ) ;
my $ cloudinit_record_changed = sub {
my ( $ conf , $ opt , $ old , $ new ) = @ _ ;
return if ! $ cloudinit_pending_properties - > { $ opt } ;
my $ ci = ( $ conf - > { cloudinit } // = { } ) ;
my $ recorded = $ ci - > { $ opt } ;
2022-11-17 14:35:27 +01:00
my % added = map { $ _ = > 1 } PVE::Tools:: split_list ( delete ( $ ci - > { added } ) // '' ) ;
if ( defined ( $ new ) ) {
if ( defined ( $ old ) ) {
# an existing value is being modified
if ( defined ( $ recorded ) ) {
# the value was already not in sync
if ( $ new eq $ recorded ) {
# a value is being reverted to the cloud-init state:
delete $ ci - > { $ opt } ;
delete $ added { $ opt } ;
} else {
# the value was changed multiple times, do nothing
}
} elsif ( $ added { $ opt } ) {
# the value had been marked as added and is being changed, do nothing
} else {
# the value is new, record it:
$ ci - > { $ opt } = $ old ;
}
2022-11-16 18:14:06 +01:00
} else {
2022-11-17 14:35:27 +01:00
# a new value is being added
if ( defined ( $ recorded ) ) {
# it was already not in sync
if ( $ new eq $ recorded ) {
# a value is being reverted to the cloud-init state:
delete $ ci - > { $ opt } ;
delete $ added { $ opt } ;
} else {
# the value had temporarily been removed, do nothing
}
} elsif ( $ added { $ opt } ) {
# the value had been marked as added already, do nothing
} else {
# the value is new, add it
$ added { $ opt } = 1 ;
}
2022-11-16 18:14:06 +01:00
}
2022-11-17 14:35:27 +01:00
} elsif ( ! defined ( $ old ) ) {
# a non-existent value is being removed? ignore...
2022-11-16 18:14:06 +01:00
} else {
2022-11-17 14:35:27 +01:00
# a value is being deleted
if ( defined ( $ recorded ) ) {
# a value was already recorded, just keep it
} elsif ( $ added { $ opt } ) {
# the value was marked as added, remove it
delete $ added { $ opt } ;
2022-11-16 18:14:06 +01:00
} else {
2022-11-17 14:35:27 +01:00
# a previously unrecorded value is being removed, record the old value:
$ ci - > { $ opt } = $ old ;
2022-11-16 18:14:06 +01:00
}
}
2022-11-17 14:35:27 +01:00
my $ added = join ( ',' , sort keys % added ) ;
$ ci - > { added } = $ added if length ( $ added ) ;
2022-11-16 18:14:06 +01:00
} ;
2014-11-17 09:50:31 +01:00
my $ changes = 0 ;
foreach my $ opt ( keys % { $ conf - > { pending } } ) { # add/change
2015-01-07 10:02:32 +01:00
if ( $ fast_plug_option - > { $ opt } ) {
2022-11-16 18:14:06 +01:00
my $ new = delete $ conf - > { pending } - > { $ opt } ;
$ cloudinit_record_changed - > ( $ conf , $ opt , $ conf - > { $ opt } , $ new ) ;
$ conf - > { $ opt } = $ new ;
2014-11-17 09:50:31 +01:00
$ changes = 1 ;
}
}
if ( $ changes ) {
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
2014-11-17 09:50:31 +01:00
}
2022-11-10 15:35:58 +01:00
my $ ostype = $ conf - > { ostype } ;
my $ version = extract_version ( $ machine_type , get_running_qemu_version ( $ vmid ) ) ;
2015-01-27 07:16:22 +01:00
my $ hotplug_features = parse_hotplug_features ( defined ( $ conf - > { hotplug } ) ? $ conf - > { hotplug } : '1' ) ;
2022-11-10 15:35:58 +01:00
my $ usb_hotplug = $ hotplug_features - > { usb }
&& min_version ( $ version , 7 , 1 )
&& defined ( $ ostype ) && ( $ ostype eq 'l26' || windows_version ( $ ostype ) > 7 ) ;
2014-11-17 09:50:31 +01:00
2020-10-30 10:42:35 +01:00
my $ cgroup = PVE::QemuServer::CGroup - > new ( $ vmid ) ;
2019-10-14 10:28:38 +02:00
my $ pending_delete_hash = PVE::QemuConfig - > parse_pending_delete ( $ conf - > { pending } - > { delete } ) ;
2022-11-16 18:14:06 +01:00
2019-10-22 12:47:16 +02:00
foreach my $ opt ( sort keys %$ pending_delete_hash ) {
2014-11-19 12:59:02 +01:00
next if $ selection && ! $ selection - > { $ opt } ;
2019-10-22 12:47:16 +02:00
my $ force = $ pending_delete_hash - > { $ opt } - > { force } ;
2014-11-19 12:59:02 +01:00
eval {
2015-02-09 16:47:52 +01:00
if ( $ opt eq 'hotplug' ) {
die "skip\n" if ( $ conf - > { hotplug } =~ /memory/ ) ;
} elsif ( $ opt eq 'tablet' ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { usb } ;
2014-11-19 12:59:02 +01:00
if ( $ defaults - > { tablet } ) {
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , 'tablet' , $ arch , $ machine_type ) ;
vm_deviceplug ( $ storecfg , $ conf , $ vmid , 'keyboard' , $ arch , $ machine_type )
if $ arch eq 'aarch64' ;
2014-11-19 12:59:02 +01:00
} else {
2018-11-12 14:10:42 +01:00
vm_deviceunplug ( $ vmid , $ conf , 'tablet' ) ;
vm_deviceunplug ( $ vmid , $ conf , 'keyboard' ) if $ arch eq 'aarch64' ;
2014-11-19 12:59:02 +01:00
}
2022-11-10 15:35:58 +01:00
} elsif ( $ opt =~ m/^usb(\d+)$/ ) {
my $ index = $ 1 ;
die "skip\n" if ! $ usb_hotplug ;
vm_deviceunplug ( $ vmid , $ conf , "usbredirdev$index" ) ; # if it's a spice port
vm_deviceunplug ( $ vmid , $ conf , $ opt ) ;
2015-01-09 16:30:36 +01:00
} elsif ( $ opt eq 'vcpus' ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { cpu } ;
2015-01-09 16:30:36 +01:00
qemu_cpu_hotplug ( $ vmid , $ conf , undef ) ;
2015-01-02 15:16:01 +01:00
} elsif ( $ opt eq 'balloon' ) {
2015-01-07 10:02:32 +01:00
# enable balloon device is not hotpluggable
2018-05-14 14:03:04 +02:00
die "skip\n" if defined ( $ conf - > { balloon } ) && $ conf - > { balloon } == 0 ;
# here we reset the ballooning value to memory
my $ balloon = $ conf - > { memory } || $ defaults - > { memory } ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "balloon" , value = > $ balloon * 1024 * 1024 ) ;
2015-01-07 10:02:32 +01:00
} elsif ( $ fast_plug_option - > { $ opt } ) {
# do nothing
2014-11-25 06:58:33 +01:00
} elsif ( $ opt =~ m/^net(\d+)$/ ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { network } ;
2014-11-25 06:58:33 +01:00
vm_deviceunplug ( $ vmid , $ conf , $ opt ) ;
2016-03-03 15:45:15 +01:00
} elsif ( is_valid_drivename ( $ opt ) ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { disk } || $ opt =~ m/(ide|sata)(\d+)/ ;
2015-01-02 15:15:58 +01:00
vm_deviceunplug ( $ vmid , $ conf , $ opt ) ;
2015-08-12 13:38:36 +02:00
vmconfig_delete_or_detach_drive ( $ vmid , $ storecfg , $ conf , $ opt , $ force ) ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
} elsif ( $ opt =~ m/^memory$/ ) {
die "skip\n" if ! $ hotplug_features - > { memory } ;
2023-02-02 12:03:34 +01:00
PVE::QemuServer::Memory:: qemu_memory_hotplug ( $ vmid , $ conf , $ defaults ) ;
2015-05-28 15:59:22 +02:00
} elsif ( $ opt eq 'cpuunits' ) {
2022-11-08 16:20:20 +01:00
$ cgroup - > change_cpu_shares ( undef ) ;
2015-06-02 08:06:45 +02:00
} elsif ( $ opt eq 'cpulimit' ) {
2021-10-12 13:20:52 +02:00
$ cgroup - > change_cpu_quota ( undef , undef ) ; # reset, cgroup module can better decide values
2014-11-18 13:29:21 +01:00
} else {
2014-11-24 10:33:51 +01:00
die "skip\n" ;
2014-11-18 13:29:21 +01:00
}
2014-11-19 12:59:02 +01:00
} ;
if ( my $ err = $@ ) {
2014-11-24 10:33:51 +01:00
& $ add_error ( $ opt , $ err ) if $ err ne "skip\n" ;
} else {
2022-11-16 18:14:06 +01:00
my $ old = delete $ conf - > { $ opt } ;
$ cloudinit_record_changed - > ( $ conf , $ opt , $ old , undef ) ;
2019-10-14 10:28:38 +02:00
PVE::QemuConfig - > remove_from_pending_delete ( $ conf , $ opt ) ;
2014-11-18 13:29:21 +01:00
}
}
2022-11-16 18:14:05 +01:00
my $ cloudinit_opt ;
2014-11-18 13:29:21 +01:00
foreach my $ opt ( keys % { $ conf - > { pending } } ) {
2014-11-19 12:59:02 +01:00
next if $ selection && ! $ selection - > { $ opt } ;
2014-11-18 13:29:21 +01:00
my $ value = $ conf - > { pending } - > { $ opt } ;
2014-11-19 12:59:02 +01:00
eval {
2015-02-09 16:47:52 +01:00
if ( $ opt eq 'hotplug' ) {
die "skip\n" if ( $ value =~ /memory/ ) || ( $ value !~ /memory/ && $ conf - > { hotplug } =~ /memory/ ) ;
} elsif ( $ opt eq 'tablet' ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { usb } ;
2014-11-19 12:59:02 +01:00
if ( $ value == 1 ) {
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , 'tablet' , $ arch , $ machine_type ) ;
vm_deviceplug ( $ storecfg , $ conf , $ vmid , 'keyboard' , $ arch , $ machine_type )
if $ arch eq 'aarch64' ;
2014-11-19 12:59:02 +01:00
} elsif ( $ value == 0 ) {
2018-11-12 14:10:42 +01:00
vm_deviceunplug ( $ vmid , $ conf , 'tablet' ) ;
vm_deviceunplug ( $ vmid , $ conf , 'keyboard' ) if $ arch eq 'aarch64' ;
2014-11-19 12:59:02 +01:00
}
2022-11-10 15:35:58 +01:00
} elsif ( $ opt =~ m/^usb(\d+)$/ ) {
my $ index = $ 1 ;
die "skip\n" if ! $ usb_hotplug ;
my $ d = eval { parse_property_string ( $ usbdesc - > { format } , $ value ) } ;
my $ id = $ opt ;
if ( $ d - > { host } eq 'spice' ) {
$ id = "usbredirdev$index" ;
}
qemu_usb_hotplug ( $ storecfg , $ conf , $ vmid , $ id , $ d , $ arch , $ machine_type ) ;
2015-01-09 16:30:36 +01:00
} elsif ( $ opt eq 'vcpus' ) {
2015-01-27 07:16:22 +01:00
die "skip\n" if ! $ hotplug_features - > { cpu } ;
2014-11-19 12:59:02 +01:00
qemu_cpu_hotplug ( $ vmid , $ conf , $ value ) ;
} elsif ( $ opt eq 'balloon' ) {
2015-01-07 10:02:32 +01:00
# enable/disable balloning device is not hotpluggable
2015-01-12 15:04:31 +01:00
my $ old_balloon_enabled = ! ! ( ! defined ( $ conf - > { balloon } ) || $ conf - > { balloon } ) ;
2015-03-27 06:16:24 +01:00
my $ new_balloon_enabled = ! ! ( ! defined ( $ conf - > { pending } - > { balloon } ) || $ conf - > { pending } - > { balloon } ) ;
2015-01-07 10:02:32 +01:00
die "skip\n" if $ old_balloon_enabled != $ new_balloon_enabled ;
2014-11-19 12:59:02 +01:00
# allow manual ballooning if shares is set to zero
2015-03-02 16:03:22 +01:00
if ( ( defined ( $ conf - > { shares } ) && ( $ conf - > { shares } == 0 ) ) ) {
2015-01-02 15:16:01 +01:00
my $ balloon = $ conf - > { pending } - > { balloon } || $ conf - > { memory } || $ defaults - > { memory } ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "balloon" , value = > $ balloon * 1024 * 1024 ) ;
2015-01-02 15:16:01 +01:00
}
2015-03-27 06:16:24 +01:00
} elsif ( $ opt =~ m/^net(\d+)$/ ) {
2014-11-25 06:58:33 +01:00
# some changes can be done without hotplug
2015-03-27 06:16:24 +01:00
vmconfig_update_net ( $ storecfg , $ conf , $ hotplug_features - > { network } ,
2018-11-12 14:10:42 +01:00
$ vmid , $ opt , $ value , $ arch , $ machine_type ) ;
2016-03-03 15:45:15 +01:00
} elsif ( is_valid_drivename ( $ opt ) ) {
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
die "skip\n" if $ opt eq 'efidisk0' || $ opt eq 'tpmstate0' ;
2014-11-25 11:37:37 +01:00
# some changes can be done without hotplug
2015-08-17 15:46:07 +02:00
my $ drive = parse_drive ( $ opt , $ value ) ;
if ( drive_is_cloudinit ( $ drive ) ) {
2022-11-16 18:14:05 +01:00
$ cloudinit_opt = [ $ opt , $ drive ] ;
# apply all the other changes first, then generate the cloudinit disk
die "skip\n" ;
2015-08-17 15:46:07 +02:00
}
2015-01-27 07:16:22 +01:00
vmconfig_update_disk ( $ storecfg , $ conf , $ hotplug_features - > { disk } ,
2020-02-06 10:53:51 +01:00
$ vmid , $ opt , $ value , $ arch , $ machine_type ) ;
memory hotplug patch v10
This patch allow to hotplug memory dimm modules
though a new option : dimm_memory
The dimm modules are generated from a map
dimmid size dimm_memory
dimm0 512 512 100.00 0
dimm1 512 1024 50.00 1
dimm2 512 1536 33.33 2
dimm3 512 2048 25.00 3
dimm4 512 2560 20.00 0
dimm5 512 3072 16.67 1
dimm6 512 3584 14.29 2
dimm7 512 4096 12.50 3
dimm8 512 4608 11.11 0
dimm9 512 5120 10.00 1
dimm10 512 5632 9.09 2
dimm11 512 6144 8.33 3
dimm12 512 6656 7.69 0
dimm13 512 7168 7.14 1
dimm14 512 7680 6.67 2
dimm15 512 8192 6.25 3
dimm16 512 8704 5.88 0
dimm17 512 9216 5.56 1
dimm18 512 9728 5.26 2
dimm19 512 10240 5.00 3
dimm20 512 10752 4.76 0
...
dimm241 65536 3260416 2.01 1
dimm242 65536 3325952 1.97 2
dimm243 65536 3391488 1.93 3
dimm244 65536 3457024 1.90 0
dimm245 65536 3522560 1.86 1
dimm246 65536 3588096 1.83 2
dimm247 65536 3653632 1.79 3
dimm248 65536 3719168 1.76 0
dimm249 65536 3784704 1.73 1
dimm250 65536 3850240 1.70 2
dimm251 65536 3915776 1.67 3
dimm252 65536 3981312 1.65 0
dimm253 65536 4046848 1.62 1
dimm254 65536 4112384 1.59 2
dimm255 65536 4177920 1.57 3
max dimm_memory size is 4TB, which is the current qemu limit
If the dimm_memory value is not aligned on memory module, we align the dimm_memory on the next module.
vmid.conf
---------
memory: 1024
numa:1
hotplug: memmory
when hotplug memory option is enabled, the minimum memory value must be 1GB, and also numa need to be enabled.
we assign the first 1GB as static memory, splitted on each numa nodes.
The remaining memory is assigned on hotpluggable dimm devices.
The static memory need to be also 128MB aligned, to have other dimm devices aligned too.
This 128MB alignment is a linux limitation, windows can align on 2MB size.
Numa need to be aligned, as linux guest don't boot on some setup with multi sockets,
and windows need numa to be able to hotplug memory
hotplug
----
qm set <vmid> -memory X (where X is bigger than current value)
unplug (not yet implemented in qemu)
------
qm set <vmid> -memory X (where X is lower than current value)
linux guest
-----------
-acpi hotplug module should be loaded in guest
-need a recent kernel. (tested with 3.10)
can be enable automaticaly, adding:
/lib/udev/rules.d/80-hotplug-cpu-mem.rules
SUBSYSTEM=="cpu", ACTION=="add", TEST=="online", ATTR{online}=="0", \
ATTR{online}="1"
SUBSYSTEM=="memory", ACTION=="add", TEST=="state", ATTR{state}=="offline", \
ATTR{state}="online"
windows guest
-------------
tested with:
- windows 2012 standard
- windows 2008 enterprise/datacenter
Signed-off-by: Alexandre Derumier <aderumier@odiso.com>
2015-01-28 06:47:24 +01:00
} elsif ( $ opt =~ m/^memory$/ ) { #dimms
die "skip\n" if ! $ hotplug_features - > { memory } ;
2023-02-02 12:03:34 +01:00
$ value = PVE::QemuServer::Memory:: qemu_memory_hotplug ( $ vmid , $ conf , $ defaults , $ value ) ;
2015-05-28 15:59:22 +02:00
} elsif ( $ opt eq 'cpuunits' ) {
2022-10-07 14:41:48 +02:00
my $ new_cpuunits = PVE::CGroup:: clamp_cpu_shares ( $ conf - > { pending } - > { $ opt } ) ; #clamp
2022-11-08 16:20:20 +01:00
$ cgroup - > change_cpu_shares ( $ new_cpuunits ) ;
2015-06-02 08:06:45 +02:00
} elsif ( $ opt eq 'cpulimit' ) {
2015-06-02 16:03:25 +02:00
my $ cpulimit = $ conf - > { pending } - > { $ opt } == 0 ? - 1 : int ( $ conf - > { pending } - > { $ opt } * 100000 ) ;
2020-10-30 10:42:35 +01:00
$ cgroup - > change_cpu_quota ( $ cpulimit , 100000 ) ;
2021-10-06 23:58:44 +02:00
} elsif ( $ opt eq 'agent' ) {
vmconfig_update_agent ( $ conf , $ opt , $ value ) ;
2014-11-19 12:59:02 +01:00
} else {
2014-11-24 10:33:51 +01:00
die "skip\n" ; # skip non-hot-pluggable options
2014-11-18 13:29:21 +01:00
}
2014-11-19 12:59:02 +01:00
} ;
2022-11-16 18:14:05 +01:00
if ( my $ err = $@ ) {
& $ add_error ( $ opt , $ err ) if $ err ne "skip\n" ;
} else {
$ cloudinit_record_changed - > ( $ conf , $ opt , $ conf - > { $ opt } , $ value ) ;
$ conf - > { $ opt } = $ value ;
delete $ conf - > { pending } - > { $ opt } ;
}
}
if ( defined ( $ cloudinit_opt ) ) {
my ( $ opt , $ drive ) = @$ cloudinit_opt ;
my $ value = $ conf - > { pending } - > { $ opt } ;
eval {
2022-11-17 15:16:19 +01:00
my $ temp = { %$ conf , $ opt = > $ value } ;
PVE::QemuServer::Cloudinit:: apply_cloudinit_config ( $ temp , $ vmid ) ;
2022-11-16 18:14:05 +01:00
vmconfig_update_disk ( $ storecfg , $ conf , $ hotplug_features - > { disk } ,
$ vmid , $ opt , $ value , $ arch , $ machine_type ) ;
} ;
2014-11-19 12:59:02 +01:00
if ( my $ err = $@ ) {
2014-11-24 10:33:51 +01:00
& $ add_error ( $ opt , $ err ) if $ err ne "skip\n" ;
} else {
2014-11-19 12:59:02 +01:00
$ conf - > { $ opt } = $ value ;
delete $ conf - > { pending } - > { $ opt } ;
2014-11-18 13:29:21 +01:00
}
}
2022-11-10 15:35:58 +01:00
# unplug xhci controller if no usb device is left
if ( $ usb_hotplug ) {
my $ has_usb = 0 ;
for ( my $ i = 0 ; $ i < $ MAX_USB_DEVICES ; $ i + + ) {
next if ! defined ( $ conf - > { "usb$i" } ) ;
$ has_usb = 1 ;
last ;
}
if ( ! $ has_usb ) {
vm_deviceunplug ( $ vmid , $ conf , 'xhci' ) ;
}
}
2019-12-13 12:41:51 +01:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
2022-06-22 13:52:05 +02:00
2022-11-16 18:14:05 +01:00
if ( $ hotplug_features - > { cloudinit } && PVE::QemuServer::Cloudinit:: has_changes ( $ conf ) ) {
PVE::QemuServer:: vmconfig_update_cloudinit_drive ( $ storecfg , $ conf , $ vmid ) ;
2022-06-22 13:52:05 +02:00
}
2014-11-17 09:50:31 +01:00
}
2014-11-17 07:08:44 +01:00
2015-08-12 13:38:36 +02:00
sub try_deallocate_drive {
my ( $ storecfg , $ vmid , $ conf , $ key , $ drive , $ rpcenv , $ authuser , $ force ) = @ _ ;
if ( ( $ force || $ key =~ /^unused/ ) && ! drive_is_cdrom ( $ drive , 1 ) ) {
my $ volid = $ drive - > { file } ;
if ( vm_is_volid_owner ( $ storecfg , $ vmid , $ volid ) ) {
my $ sid = PVE::Storage:: parse_volume_id ( $ volid ) ;
$ rpcenv - > check ( $ authuser , "/storage/$sid" , [ 'Datastore.AllocateSpace' ] ) ;
2015-08-13 11:15:56 +02:00
# check if the disk is really unused
die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
2020-03-02 11:33:44 +01:00
if PVE::QemuServer::Drive:: is_volume_in_use ( $ storecfg , $ conf , $ key , $ volid ) ;
2015-08-13 11:15:56 +02:00
PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) ;
2015-08-12 13:38:36 +02:00
return 1 ;
2015-08-20 10:34:59 +02:00
} else {
# If vm is not owner of this disk remove from config
return 1 ;
2015-08-12 13:38:36 +02:00
}
}
2020-10-16 16:20:05 +02:00
return ;
2015-08-12 13:38:36 +02:00
}
sub vmconfig_delete_or_detach_drive {
my ( $ vmid , $ storecfg , $ conf , $ opt , $ force ) = @ _ ;
my $ drive = parse_drive ( $ opt , $ conf - > { $ opt } ) ;
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
my $ authuser = $ rpcenv - > get_user ( ) ;
if ( $ force ) {
$ rpcenv - > check_vm_perm ( $ authuser , $ vmid , undef , [ 'VM.Config.Disk' ] ) ;
try_deallocate_drive ( $ storecfg , $ vmid , $ conf , $ opt , $ drive , $ rpcenv , $ authuser , $ force ) ;
} else {
vmconfig_register_unused_drive ( $ storecfg , $ vmid , $ conf , $ drive ) ;
}
}
2019-10-14 10:28:38 +02:00
2014-11-17 07:08:44 +01:00
sub vmconfig_apply_pending {
2022-11-17 14:33:40 +01:00
my ( $ vmid , $ conf , $ storecfg , $ errors , $ skip_cloud_init ) = @ _ ;
2020-01-07 16:55:18 +01:00
2021-12-02 12:43:03 +01:00
return if ! scalar ( keys % { $ conf - > { pending } } ) ;
2020-01-07 16:55:18 +01:00
my $ add_apply_error = sub {
my ( $ opt , $ msg ) = @ _ ;
my $ err_msg = "unable to apply pending change $opt : $msg" ;
$ errors - > { $ opt } = $ err_msg ;
warn $ err_msg ;
} ;
2014-11-17 09:50:31 +01:00
# cold plug
2014-11-17 07:08:44 +01:00
2019-10-14 10:28:38 +02:00
my $ pending_delete_hash = PVE::QemuConfig - > parse_pending_delete ( $ conf - > { pending } - > { delete } ) ;
2019-10-22 12:47:16 +02:00
foreach my $ opt ( sort keys %$ pending_delete_hash ) {
2019-10-22 12:34:27 +02:00
my $ force = $ pending_delete_hash - > { $ opt } - > { force } ;
2020-01-07 16:55:18 +01:00
eval {
2020-01-15 14:48:59 +01:00
if ( $ opt =~ m/^unused/ ) {
die "internal error" ;
} elsif ( defined ( $ conf - > { $ opt } ) && is_valid_drivename ( $ opt ) ) {
2020-01-07 16:55:18 +01:00
vmconfig_delete_or_detach_drive ( $ vmid , $ storecfg , $ conf , $ opt , $ force ) ;
}
} ;
if ( my $ err = $@ ) {
$ add_apply_error - > ( $ opt , $ err ) ;
2014-11-17 07:08:44 +01:00
} else {
2019-10-14 10:28:38 +02:00
PVE::QemuConfig - > remove_from_pending_delete ( $ conf , $ opt ) ;
2014-11-17 07:08:44 +01:00
delete $ conf - > { $ opt } ;
}
}
2020-01-15 14:48:59 +01:00
PVE::QemuConfig - > cleanup_pending ( $ conf ) ;
2014-11-17 07:08:44 +01:00
2022-11-17 14:33:41 +01:00
my $ generate_cloudinit = $ skip_cloud_init ? 0 : undef ;
2022-06-22 13:52:01 +02:00
2014-11-17 07:08:44 +01:00
foreach my $ opt ( keys % { $ conf - > { pending } } ) { # add/change
2020-01-15 14:48:59 +01:00
next if $ opt eq 'delete' ; # just to be sure
2020-01-07 16:55:18 +01:00
eval {
2020-01-15 14:48:59 +01:00
if ( defined ( $ conf - > { $ opt } ) && is_valid_drivename ( $ opt ) ) {
2020-01-07 16:55:18 +01:00
vmconfig_register_unused_drive ( $ storecfg , $ vmid , $ conf , parse_drive ( $ opt , $ conf - > { $ opt } ) )
}
} ;
if ( my $ err = $@ ) {
$ add_apply_error - > ( $ opt , $ err ) ;
2014-11-17 07:08:44 +01:00
} else {
2022-06-22 13:52:01 +02:00
if ( is_valid_drivename ( $ opt ) ) {
my $ drive = parse_drive ( $ opt , $ conf - > { pending } - > { $ opt } ) ;
2022-11-17 14:33:41 +01:00
$ generate_cloudinit // = 1 if drive_is_cloudinit ( $ drive ) ;
2022-06-22 13:52:01 +02:00
}
2020-01-07 16:55:18 +01:00
$ conf - > { $ opt } = delete $ conf - > { pending } - > { $ opt } ;
2014-11-17 07:08:44 +01:00
}
}
2020-01-15 14:48:59 +01:00
# write all changes at once to avoid unnecessary i/o
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
2022-11-17 14:33:41 +01:00
if ( $ generate_cloudinit ) {
2022-11-16 18:14:05 +01:00
if ( PVE::QemuServer::Cloudinit:: apply_cloudinit_config ( $ conf , $ vmid ) ) {
# After successful generation and if there were changes to be applied, update the
# config to drop the {cloudinit} entry.
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
}
}
2014-11-17 07:08:44 +01:00
}
2014-11-25 06:58:33 +01:00
sub vmconfig_update_net {
2018-11-12 14:10:42 +01:00
my ( $ storecfg , $ conf , $ hotplug , $ vmid , $ opt , $ value , $ arch , $ machine_type ) = @ _ ;
2014-11-25 06:58:33 +01:00
my $ newnet = parse_net ( $ value ) ;
if ( $ conf - > { $ opt } ) {
my $ oldnet = parse_net ( $ conf - > { $ opt } ) ;
2020-02-19 17:07:56 +01:00
if ( safe_string_ne ( $ oldnet - > { model } , $ newnet - > { model } ) ||
safe_string_ne ( $ oldnet - > { macaddr } , $ newnet - > { macaddr } ) ||
safe_num_ne ( $ oldnet - > { queues } , $ newnet - > { queues } ) ||
2014-11-25 06:58:33 +01:00
! ( $ newnet - > { bridge } && $ oldnet - > { bridge } ) ) { # bridge/nat mode change
# for non online change, we try to hot-unplug
2015-01-21 08:52:37 +01:00
die "skip\n" if ! $ hotplug ;
2014-11-25 06:58:33 +01:00
vm_deviceunplug ( $ vmid , $ conf , $ opt ) ;
} else {
die "internal error" if $ opt !~ m/net(\d+)/ ;
my $ iface = "tap${vmid}i$1" ;
2015-03-27 06:16:24 +01:00
2020-02-19 17:07:56 +01:00
if ( safe_string_ne ( $ oldnet - > { bridge } , $ newnet - > { bridge } ) ||
safe_num_ne ( $ oldnet - > { tag } , $ newnet - > { tag } ) ||
safe_string_ne ( $ oldnet - > { trunks } , $ newnet - > { trunks } ) ||
safe_num_ne ( $ oldnet - > { firewall } , $ newnet - > { firewall } ) ) {
2014-11-25 06:58:33 +01:00
PVE::Network:: tap_unplug ( $ iface ) ;
2020-03-09 10:24:21 +01:00
if ( $ have_sdn ) {
PVE::Network::SDN::Zones:: tap_plug ( $ iface , $ newnet - > { bridge } , $ newnet - > { tag } , $ newnet - > { firewall } , $ newnet - > { trunks } , $ newnet - > { rate } ) ;
} else {
PVE::Network:: tap_plug ( $ iface , $ newnet - > { bridge } , $ newnet - > { tag } , $ newnet - > { firewall } , $ newnet - > { trunks } , $ newnet - > { rate } ) ;
}
2020-02-19 17:07:56 +01:00
} elsif ( safe_num_ne ( $ oldnet - > { rate } , $ newnet - > { rate } ) ) {
2016-03-08 13:55:13 +01:00
# Rate can be applied on its own but any change above needs to
# include the rate in tap_plug since OVS resets everything.
PVE::Network:: tap_rate_limit ( $ iface , $ newnet - > { rate } ) ;
2014-11-25 06:58:33 +01:00
}
2014-11-25 12:07:02 +01:00
2020-02-19 17:07:56 +01:00
if ( safe_string_ne ( $ oldnet - > { link_down } , $ newnet - > { link_down } ) ) {
2015-01-20 11:47:11 +01:00
qemu_set_link_status ( $ vmid , $ opt , ! $ newnet - > { link_down } ) ;
}
2014-11-25 12:07:02 +01:00
return 1 ;
2014-11-25 06:58:33 +01:00
}
}
2015-03-27 06:16:24 +01:00
2015-01-21 08:52:37 +01:00
if ( $ hotplug ) {
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , $ opt , $ newnet , $ arch , $ machine_type ) ;
2014-11-25 12:07:02 +01:00
} else {
die "skip\n" ;
}
2014-11-25 06:58:33 +01:00
}
2021-10-06 23:58:44 +02:00
sub vmconfig_update_agent {
my ( $ conf , $ opt , $ value ) = @ _ ;
die "skip\n" if ! $ conf - > { $ opt } ;
my $ hotplug_options = { fstrim_cloned_disks = > 1 } ;
my $ old_agent = parse_guest_agent ( $ conf ) ;
my $ agent = parse_guest_agent ( { $ opt = > $ value } ) ;
2021-11-04 08:45:31 +01:00
for my $ option ( keys %$ agent ) { # added/changed options
2021-10-06 23:58:44 +02:00
next if defined ( $ hotplug_options - > { $ option } ) ;
die "skip\n" if safe_string_ne ( $ agent - > { $ option } , $ old_agent - > { $ option } ) ;
}
2021-11-04 08:45:31 +01:00
for my $ option ( keys %$ old_agent ) { # removed options
2021-10-06 23:58:44 +02:00
next if defined ( $ hotplug_options - > { $ option } ) ;
die "skip\n" if safe_string_ne ( $ old_agent - > { $ option } , $ agent - > { $ option } ) ;
}
2021-11-04 08:45:31 +01:00
return ; # either no actual change (e.g., format string reordered) or just hotpluggable changes
2021-10-06 23:58:44 +02:00
}
2014-11-25 11:37:37 +01:00
sub vmconfig_update_disk {
2020-02-06 10:53:51 +01:00
my ( $ storecfg , $ conf , $ hotplug , $ vmid , $ opt , $ value , $ arch , $ machine_type ) = @ _ ;
2014-11-25 11:37:37 +01:00
my $ drive = parse_drive ( $ opt , $ value ) ;
2020-09-02 14:07:02 +02:00
if ( $ conf - > { $ opt } && ( my $ old_drive = parse_drive ( $ opt , $ conf - > { $ opt } ) ) ) {
my $ media = $ drive - > { media } || 'disk' ;
my $ oldmedia = $ old_drive - > { media } || 'disk' ;
die "unable to change media type\n" if $ media ne $ oldmedia ;
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
if ( ! drive_is_cdrom ( $ old_drive ) ) {
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
if ( $ drive - > { file } ne $ old_drive - > { file } ) {
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
die "skip\n" if ! $ hotplug ;
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
# unplug and register as unused
vm_deviceunplug ( $ vmid , $ conf , $ opt ) ;
vmconfig_register_unused_drive ( $ storecfg , $ vmid , $ conf , $ old_drive )
2015-03-27 06:16:24 +01:00
2020-09-02 14:07:02 +02:00
} else {
# update existing disk
# skip non hotpluggable value
2023-02-10 15:19:09 +01:00
if ( safe_string_ne ( $ drive - > { aio } , $ old_drive - > { aio } ) ||
safe_string_ne ( $ drive - > { discard } , $ old_drive - > { discard } ) ||
2020-09-02 14:07:02 +02:00
safe_string_ne ( $ drive - > { iothread } , $ old_drive - > { iothread } ) ||
safe_string_ne ( $ drive - > { queues } , $ old_drive - > { queues } ) ||
safe_string_ne ( $ drive - > { cache } , $ old_drive - > { cache } ) ||
2022-10-17 15:24:00 +02:00
safe_string_ne ( $ drive - > { ssd } , $ old_drive - > { ssd } ) ||
safe_string_ne ( $ drive - > { ro } , $ old_drive - > { ro } ) ) {
2020-09-02 14:07:02 +02:00
die "skip\n" ;
}
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
# apply throttle
if ( safe_num_ne ( $ drive - > { mbps } , $ old_drive - > { mbps } ) ||
safe_num_ne ( $ drive - > { mbps_rd } , $ old_drive - > { mbps_rd } ) ||
safe_num_ne ( $ drive - > { mbps_wr } , $ old_drive - > { mbps_wr } ) ||
safe_num_ne ( $ drive - > { iops } , $ old_drive - > { iops } ) ||
safe_num_ne ( $ drive - > { iops_rd } , $ old_drive - > { iops_rd } ) ||
safe_num_ne ( $ drive - > { iops_wr } , $ old_drive - > { iops_wr } ) ||
safe_num_ne ( $ drive - > { mbps_max } , $ old_drive - > { mbps_max } ) ||
safe_num_ne ( $ drive - > { mbps_rd_max } , $ old_drive - > { mbps_rd_max } ) ||
safe_num_ne ( $ drive - > { mbps_wr_max } , $ old_drive - > { mbps_wr_max } ) ||
safe_num_ne ( $ drive - > { iops_max } , $ old_drive - > { iops_max } ) ||
safe_num_ne ( $ drive - > { iops_rd_max } , $ old_drive - > { iops_rd_max } ) ||
safe_num_ne ( $ drive - > { iops_wr_max } , $ old_drive - > { iops_wr_max } ) ||
safe_num_ne ( $ drive - > { bps_max_length } , $ old_drive - > { bps_max_length } ) ||
safe_num_ne ( $ drive - > { bps_rd_max_length } , $ old_drive - > { bps_rd_max_length } ) ||
safe_num_ne ( $ drive - > { bps_wr_max_length } , $ old_drive - > { bps_wr_max_length } ) ||
safe_num_ne ( $ drive - > { iops_max_length } , $ old_drive - > { iops_max_length } ) ||
safe_num_ne ( $ drive - > { iops_rd_max_length } , $ old_drive - > { iops_rd_max_length } ) ||
safe_num_ne ( $ drive - > { iops_wr_max_length } , $ old_drive - > { iops_wr_max_length } ) ) {
qemu_block_set_io_throttle (
$ vmid , "drive-$opt" ,
( $ drive - > { mbps } || 0 ) * 1024 * 1024 ,
( $ drive - > { mbps_rd } || 0 ) * 1024 * 1024 ,
( $ drive - > { mbps_wr } || 0 ) * 1024 * 1024 ,
$ drive - > { iops } || 0 ,
$ drive - > { iops_rd } || 0 ,
$ drive - > { iops_wr } || 0 ,
( $ drive - > { mbps_max } || 0 ) * 1024 * 1024 ,
( $ drive - > { mbps_rd_max } || 0 ) * 1024 * 1024 ,
( $ drive - > { mbps_wr_max } || 0 ) * 1024 * 1024 ,
$ drive - > { iops_max } || 0 ,
$ drive - > { iops_rd_max } || 0 ,
$ drive - > { iops_wr_max } || 0 ,
$ drive - > { bps_max_length } || 1 ,
$ drive - > { bps_rd_max_length } || 1 ,
$ drive - > { bps_wr_max_length } || 1 ,
$ drive - > { iops_max_length } || 1 ,
$ drive - > { iops_rd_max_length } || 1 ,
$ drive - > { iops_wr_max_length } || 1 ,
) ;
2014-11-25 11:37:37 +01:00
2020-09-02 14:07:02 +02:00
}
2015-03-27 06:16:24 +01:00
2020-09-02 14:07:02 +02:00
return 1 ;
}
2015-02-12 08:00:14 +01:00
2020-09-02 14:07:02 +02:00
} else { # cdrom
2015-03-27 06:16:24 +01:00
2020-09-02 14:07:02 +02:00
if ( $ drive - > { file } eq 'none' ) {
mon_cmd ( $ vmid , "eject" , force = > JSON:: true , id = > "$opt" ) ;
if ( drive_is_cloudinit ( $ old_drive ) ) {
vmconfig_register_unused_drive ( $ storecfg , $ vmid , $ conf , $ old_drive ) ;
}
} else {
my $ path = get_iso_path ( $ storecfg , $ vmid , $ drive - > { file } ) ;
2020-02-06 10:53:52 +01:00
2020-09-02 14:07:02 +02:00
# force eject if locked
mon_cmd ( $ vmid , "eject" , force = > JSON:: true , id = > "$opt" ) ;
2020-02-06 10:53:52 +01:00
2020-09-02 14:07:02 +02:00
if ( $ path ) {
mon_cmd ( $ vmid , "blockdev-change-medium" ,
id = > "$opt" , filename = > "$path" ) ;
2015-02-12 08:00:14 +01:00
}
2014-11-25 11:37:37 +01:00
}
2020-09-02 14:07:02 +02:00
return 1 ;
2014-11-25 11:37:37 +01:00
}
}
2015-03-27 06:16:24 +01:00
die "skip\n" if ! $ hotplug || $ opt =~ m/(ide|sata)(\d+)/ ;
2015-02-12 08:00:14 +01:00
# hotplug new disks
2016-02-04 16:47:56 +01:00
PVE::Storage:: activate_volumes ( $ storecfg , [ $ drive - > { file } ] ) if $ drive - > { file } !~ m | ^ /dev/ . + | ;
2018-11-12 14:10:42 +01:00
vm_deviceplug ( $ storecfg , $ conf , $ vmid , $ opt , $ drive , $ arch , $ machine_type ) ;
2014-11-25 11:37:37 +01:00
}
2022-06-22 13:52:04 +02:00
sub vmconfig_update_cloudinit_drive {
my ( $ storecfg , $ conf , $ vmid ) = @ _ ;
my $ cloudinit_ds = undef ;
my $ cloudinit_drive = undef ;
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
my ( $ ds , $ drive ) = @ _ ;
if ( PVE::QemuServer:: drive_is_cloudinit ( $ drive ) ) {
$ cloudinit_ds = $ ds ;
$ cloudinit_drive = $ drive ;
}
} ) ;
return if ! $ cloudinit_drive ;
2022-11-16 18:14:05 +01:00
if ( PVE::QemuServer::Cloudinit:: apply_cloudinit_config ( $ conf , $ vmid ) ) {
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
}
2022-06-22 13:52:04 +02:00
my $ running = PVE::QemuServer:: check_running ( $ vmid ) ;
if ( $ running ) {
my $ path = PVE::Storage:: path ( $ storecfg , $ cloudinit_drive - > { file } ) ;
if ( $ path ) {
mon_cmd ( $ vmid , "eject" , force = > JSON:: true , id = > "$cloudinit_ds" ) ;
mon_cmd ( $ vmid , "blockdev-change-medium" , id = > "$cloudinit_ds" , filename = > "$path" ) ;
}
}
}
2020-03-30 13:41:32 +02:00
# called in locked context by incoming migration
2020-03-30 13:41:35 +02:00
sub vm_migrate_get_nbd_disks {
my ( $ storecfg , $ conf , $ replicated_volumes ) = @ _ ;
2020-03-30 13:41:32 +02:00
my $ local_volumes = { } ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2020-03-30 13:41:32 +02:00
my ( $ ds , $ drive ) = @ _ ;
return if drive_is_cdrom ( $ drive ) ;
2021-11-16 11:52:15 +01:00
return if $ ds eq 'tpmstate0' ;
2020-03-30 13:41:32 +02:00
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid ) ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
return if $ scfg - > { shared } ;
2020-03-30 13:41:35 +02:00
# replicated disks re-use existing state via bitmap
my $ use_existing = $ replicated_volumes - > { $ volid } ? 1 : 0 ;
$ local_volumes - > { $ ds } = [ $ volid , $ storeid , $ volname , $ drive , $ use_existing ] ;
2020-03-30 13:41:32 +02:00
} ) ;
2020-03-30 13:41:35 +02:00
return $ local_volumes ;
}
# called in locked context by incoming migration
sub vm_migrate_alloc_nbd_disks {
my ( $ storecfg , $ vmid , $ source_volumes , $ storagemap ) = @ _ ;
2020-03-30 13:41:32 +02:00
my $ nbd = { } ;
2020-03-30 13:41:35 +02:00
foreach my $ opt ( sort keys %$ source_volumes ) {
2022-02-09 14:07:41 +01:00
my ( $ volid , $ storeid , $ volname , $ drive , $ use_existing , $ format ) = @ { $ source_volumes - > { $ opt } } ;
2020-03-30 13:41:35 +02:00
if ( $ use_existing ) {
$ nbd - > { $ opt } - > { drivestr } = print_drive ( $ drive ) ;
$ nbd - > { $ opt } - > { volid } = $ volid ;
$ nbd - > { $ opt } - > { replicated } = 1 ;
2020-03-30 13:41:32 +02:00
next ;
}
2022-02-09 14:07:41 +01:00
# storage mapping + volname = regular migration
# storage mapping + format = remote migration
# order of precedence, filtered by whether storage supports it:
# 1. explicit requested format
# 2. format of current volume
# 3. default format of storage
2020-03-30 13:41:33 +02:00
if ( ! $ storagemap - > { identity } ) {
2022-02-09 14:07:37 +01:00
$ storeid = PVE::JSONSchema:: map_id ( $ storagemap , $ storeid ) ;
2020-03-30 13:41:32 +02:00
my ( $ defFormat , $ validFormats ) = PVE::Storage:: storage_default_format ( $ storecfg , $ storeid ) ;
2022-02-09 14:07:41 +01:00
if ( ! $ format || ! grep { $ format eq $ _ } @$ validFormats ) {
if ( $ volname ) {
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
my $ fileFormat = qemu_img_format ( $ scfg , $ volname ) ;
$ format = $ fileFormat
if grep { $ fileFormat eq $ _ } @$ validFormats ;
}
$ format // = $ defFormat ;
}
2020-03-30 13:41:32 +02:00
} else {
2022-02-09 14:07:41 +01:00
# can't happen for remote migration, so $volname is always defined
2020-03-30 13:41:32 +02:00
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
$ format = qemu_img_format ( $ scfg , $ volname ) ;
}
2020-09-02 14:07:02 +02:00
my $ size = $ drive - > { size } / 1024 ;
my $ newvolid = PVE::Storage:: vdisk_alloc ( $ storecfg , $ storeid , $ vmid , $ format , undef , $ size ) ;
2020-03-30 13:41:32 +02:00
my $ newdrive = $ drive ;
$ newdrive - > { format } = $ format ;
$ newdrive - > { file } = $ newvolid ;
my $ drivestr = print_drive ( $ newdrive ) ;
2020-03-30 13:41:35 +02:00
$ nbd - > { $ opt } - > { drivestr } = $ drivestr ;
$ nbd - > { $ opt } - > { volid } = $ newvolid ;
2020-03-30 13:41:32 +02:00
}
return $ nbd ;
}
# see vm_start_nolock for parameters, additionally:
# migrate_opts:
2020-03-30 13:41:33 +02:00
# storagemap = parsed storage map for allocating NBD disks
2020-03-30 13:41:31 +02:00
sub vm_start {
my ( $ storecfg , $ vmid , $ params , $ migrate_opts ) = @ _ ;
2020-03-30 13:41:36 +02:00
return PVE::QemuConfig - > lock_config ( $ vmid , sub {
2020-03-30 13:41:31 +02:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid , $ migrate_opts - > { migratedfrom } ) ;
2020-08-06 13:13:48 +02:00
die "you can't start a vm if it's a template\n"
if ! $ params - > { skiptemplate } && PVE::QemuConfig - > is_template ( $ conf ) ;
2020-03-30 13:41:31 +02:00
2020-07-07 10:20:03 +02:00
my $ has_suspended_lock = PVE::QemuConfig - > has_lock ( $ conf , 'suspended' ) ;
2020-10-19 14:18:41 +02:00
my $ has_backup_lock = PVE::QemuConfig - > has_lock ( $ conf , 'backup' ) ;
my $ running = check_running ( $ vmid , undef , $ migrate_opts - > { migratedfrom } ) ;
if ( $ has_backup_lock && $ running ) {
# a backup is currently running, attempt to start the guest in the
# existing QEMU instance
return vm_resume ( $ vmid ) ;
}
2020-03-30 13:41:31 +02:00
PVE::QemuConfig - > check_lock ( $ conf )
2020-07-07 10:20:03 +02:00
if ! ( $ params - > { skiplock } || $ has_suspended_lock ) ;
$ params - > { resume } = $ has_suspended_lock || defined ( $ conf - > { vmstate } ) ;
2020-03-30 13:41:31 +02:00
2020-10-19 14:18:41 +02:00
die "VM $vmid already running\n" if $ running ;
2020-03-30 13:41:31 +02:00
2020-03-30 13:41:35 +02:00
if ( my $ storagemap = $ migrate_opts - > { storagemap } ) {
my $ replicated = $ migrate_opts - > { replicated_volumes } ;
my $ disks = vm_migrate_get_nbd_disks ( $ storecfg , $ conf , $ replicated ) ;
$ migrate_opts - > { nbd } = vm_migrate_alloc_nbd_disks ( $ storecfg , $ vmid , $ disks , $ storagemap ) ;
foreach my $ opt ( keys % { $ migrate_opts - > { nbd } } ) {
$ conf - > { $ opt } = $ migrate_opts - > { nbd } - > { $ opt } - > { drivestr } ;
}
}
2020-03-30 13:41:32 +02:00
2020-03-30 13:41:36 +02:00
return vm_start_nolock ( $ storecfg , $ vmid , $ conf , $ params , $ migrate_opts ) ;
2020-03-30 13:41:31 +02:00
} ) ;
}
2020-03-30 13:41:30 +02:00
# params:
# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
# skiplock => 0/1, skip checking for config lock
2020-08-06 13:13:48 +02:00
# skiptemplate => 0/1, skip checking whether VM is template
2022-12-20 10:23:32 +01:00
# forcemachine => to force QEMU machine (rollback/migration)
2020-04-07 15:56:15 +02:00
# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
2020-03-30 13:41:30 +02:00
# timeout => in seconds
# paused => start VM in paused state (backup)
2020-03-30 13:41:31 +02:00
# resume => resume from hibernation
2021-03-03 10:56:08 +01:00
# pbs-backing => {
# sata0 => {
# repository
# snapshot
# keyfile
# archive
# },
# virtio2 => ...
# }
2020-03-30 13:41:30 +02:00
# migrate_opts:
2020-03-30 13:41:35 +02:00
# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
2020-03-30 13:41:30 +02:00
# migratedfrom => source node
# spice_ticket => used for spice migration, passed via tunnel/stdin
# network => CIDR of migration network
# type => secure/insecure - tunnel over encrypted connection or plain-text
# nbd_proto_version => int, 0 for TCP, 1 for UNIX
2021-11-22 11:30:11 +01:00
# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
2022-04-28 13:37:37 +02:00
# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
# contained in config
2020-03-30 13:41:31 +02:00
sub vm_start_nolock {
my ( $ storecfg , $ vmid , $ conf , $ params , $ migrate_opts ) = @ _ ;
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
my $ statefile = $ params - > { statefile } ;
my $ resume = $ params - > { resume } ;
2013-02-14 11:58:53 +01:00
2020-03-30 13:41:31 +02:00
my $ migratedfrom = $ migrate_opts - > { migratedfrom } ;
my $ migration_type = $ migrate_opts - > { type } ;
2019-03-14 17:04:48 +01:00
2020-03-30 13:41:36 +02:00
my $ res = { } ;
2020-03-30 13:41:31 +02:00
# clean up leftover reboot request files
eval { clear_reboot_request ( $ vmid ) ; } ;
warn $@ if $@ ;
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
if ( ! $ statefile && scalar ( keys % { $ conf - > { pending } } ) ) {
vmconfig_apply_pending ( $ vmid , $ conf , $ storecfg ) ;
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ; # update/reload
}
2019-09-11 14:07:43 +02:00
2022-11-16 18:14:03 +01:00
# don't regenerate the ISO if the VM is started as part of a live migration
# this way we can reuse the old ISO with the correct config
2022-11-16 18:14:05 +01:00
if ( ! $ migratedfrom ) {
if ( PVE::QemuServer::Cloudinit:: apply_cloudinit_config ( $ conf , $ vmid ) ) {
# FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
# $conf->{cloudinit}, so we could just not do this?
# But we do it above, so for now let's be consistent.
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ; # update/reload
}
}
2022-11-16 18:14:03 +01:00
2022-04-28 13:37:37 +02:00
# override offline migrated volumes, conf is out of date still
if ( my $ offline_volumes = $ migrate_opts - > { offline_volumes } ) {
for my $ key ( sort keys $ offline_volumes - > % * ) {
my $ parsed = parse_drive ( $ key , $ conf - > { $ key } ) ;
$ parsed - > { file } = $ offline_volumes - > { $ key } ;
$ conf - > { $ key } = print_drive ( $ parsed ) ;
}
2021-11-22 11:30:11 +01:00
}
2020-03-30 13:41:31 +02:00
my $ defaults = load_defaults ( ) ;
2015-06-16 14:26:43 +02:00
2020-03-30 13:41:31 +02:00
# set environment variable useful inside network script
2022-11-17 14:33:44 +01:00
# for remote migration the config is available on the target node!
if ( ! $ migrate_opts - > { remote_node } ) {
$ ENV { PVE_MIGRATED_FROM } = $ migratedfrom ;
}
2012-09-18 09:23:47 +02:00
2020-03-30 13:41:31 +02:00
PVE::GuestHelpers:: exec_hookscript ( $ conf , $ vmid , 'pre-start' , 1 ) ;
2019-01-31 14:33:39 +01:00
2020-03-30 13:41:31 +02:00
my $ forcemachine = $ params - > { forcemachine } ;
2020-04-07 15:56:16 +02:00
my $ forcecpu = $ params - > { forcecpu } ;
2020-03-30 13:41:31 +02:00
if ( $ resume ) {
2020-04-07 15:56:16 +02:00
# enforce machine and CPU type on suspended vm to ensure HW compatibility
2020-03-30 13:41:31 +02:00
$ forcemachine = $ conf - > { runningmachine } ;
2020-04-07 15:56:16 +02:00
$ forcecpu = $ conf - > { runningcpu } ;
2020-03-30 13:41:31 +02:00
print "Resuming suspended VM\n" ;
}
2019-03-14 17:04:48 +01:00
2021-03-03 10:56:08 +01:00
my ( $ cmd , $ vollist , $ spice_port ) = config_to_command ( $ storecfg , $ vmid ,
$ conf , $ defaults , $ forcemachine , $ forcecpu , $ params - > { 'pbs-backing' } ) ;
2012-09-18 09:23:47 +02:00
2020-03-30 13:41:31 +02:00
my $ migration_ip ;
my $ get_migration_ip = sub {
my ( $ nodename ) = @ _ ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
return $ migration_ip if defined ( $ migration_ip ) ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
my $ cidr = $ migrate_opts - > { network } ;
2020-03-30 13:41:30 +02:00
2020-03-30 13:41:31 +02:00
if ( ! defined ( $ cidr ) ) {
my $ dc_conf = PVE::Cluster:: cfs_read_file ( 'datacenter.cfg' ) ;
$ cidr = $ dc_conf - > { migration } - > { network } ;
}
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
if ( defined ( $ cidr ) ) {
my $ ips = PVE::Network:: get_local_ip_from_cidr ( $ cidr ) ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
die "could not get IP: no address configured on local " .
"node for network '$cidr'\n" if scalar ( @$ ips ) == 0 ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
die "could not get IP: multiple addresses configured on local " .
"node for network '$cidr'\n" if scalar ( @$ ips ) > 1 ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
$ migration_ip = @$ ips [ 0 ] ;
}
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
$ migration_ip = PVE::Cluster:: remote_node_ip ( $ nodename , 1 )
if ! defined ( $ migration_ip ) ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
return $ migration_ip ;
} ;
2019-11-11 11:28:30 +01:00
2020-03-30 13:41:31 +02:00
if ( $ statefile ) {
if ( $ statefile eq 'tcp' ) {
2022-11-17 14:33:43 +01:00
my $ migrate = $ res - > { migrate } = { proto = > 'tcp' } ;
$ migrate - > { addr } = "localhost" ;
2020-03-30 13:41:31 +02:00
my $ datacenterconf = PVE::Cluster:: cfs_read_file ( 'datacenter.cfg' ) ;
my $ nodename = nodename ( ) ;
2016-10-31 09:42:31 +01:00
2020-03-30 13:41:31 +02:00
if ( ! defined ( $ migration_type ) ) {
if ( defined ( $ datacenterconf - > { migration } - > { type } ) ) {
$ migration_type = $ datacenterconf - > { migration } - > { type } ;
} else {
$ migration_type = 'secure' ;
2016-12-02 17:55:29 +01:00
}
2020-03-30 13:41:31 +02:00
}
2016-12-02 17:55:29 +01:00
2020-03-30 13:41:31 +02:00
if ( $ migration_type eq 'insecure' ) {
2022-11-17 14:33:43 +01:00
$ migrate - > { addr } = $ get_migration_ip - > ( $ nodename ) ;
$ migrate - > { addr } = "[$migrate->{addr}]" if Net::IP:: ip_is_ipv6 ( $ migrate - > { addr } ) ;
2020-03-30 13:41:31 +02:00
}
2016-10-31 09:42:31 +01:00
2020-03-30 13:41:31 +02:00
my $ pfamily = PVE::Tools:: get_host_address_family ( $ nodename ) ;
2022-11-17 14:33:43 +01:00
$ migrate - > { port } = PVE::Tools:: next_migrate_port ( $ pfamily ) ;
$ migrate - > { uri } = "tcp:$migrate->{addr}:$migrate->{port}" ;
push @$ cmd , '-incoming' , $ migrate - > { uri } ;
2020-03-30 13:41:31 +02:00
push @$ cmd , '-S' ;
migrate: use ssh forwarded UNIX socket tunnel
We cannot guarantee when the SSH forward Tunnel really becomes
ready. The check with the mtunnel API call did not help for this
prolem as it only checked that the SSH connection itself works and
that the destination node has quorum but the forwarded tunnel itself
was not checked.
The Forward tunnel is a different channel in the SSH connection,
independent of the SSH `qm mtunnel` channel, so only if that works
it does not guarantees that our migration tunnel is up and ready.
When the node(s) where under load, or when we did parallel
migrations (migrateall), the migrate command was often started
before a tunnel was open and ready to receive data. This led to
a direct abortion of the migration and is the main cause in why
parallel migrations often leave two thirds or more VMs on the
source node.
The issue was tracked down to SSH after debugging the QEMU
process and enabling debug logging showed that the tunnel became
often to late available and ready, or not at all.
Fixing the TCP forward tunnel is quirky and not straight ahead, the
only way SSH gives as a possibility is to use -N (no command)
-f (background) and -o "ExitOnForwardFailure=yes", then it would
wait in the foreground until the tunnel is ready and only then
background itself. This is not quite the nicest way for our special
use case and our code base.
Waiting for the local port to become open and ready (through
/proc/net/tcp[6]] as a proof of concept is not enough, even if the
port is in the listening state and should theoretically accept
connections this still failed often as the tunnel was not yet fully
ready.
Further another problem would still be open if we tried to patch the
SSH Forward method we currently use - which we solve for free with
the approach of this patch - namely the problem that the method
to get an available port (next_migration_port) has a serious race
condition which could lead to multiple use of the same port on a
parallel migration (I observed this on my many test, seldom but if
it happens its really bad).
So lets now use UNIX sockets, which ssh supports since version 5.7.
The end points are UNIX socket bound to the VMID - thus no port so
no race and also no limitation of available ports (we reserved 50 for
migration).
The endpoints get created in /run/qemu-server/VMID.migrate and as
KVM/QEMU in current versions is able to use UNIX socket just as well
as TCP we have not to change much on the interaction with QEMU.
QEMU is started with the migrate_incoming url at the local
destination endpoint and creates the socket file, we then create
a listening socket on the source side and connect over SSH to the
destination.
Now the migration can be started by issuing the migrate qmp command
with an updated uri.
This breaks live migration from new to old, but *not* from old to
new, so there is a upgrade path.
If a live migration from new to old must be made (for whatever
reason), use the unsecure_migration setting (man datacenter.conf)
to allow this, although that should only be done in trusted network.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2016-06-03 11:32:00 +02:00
2020-03-30 13:41:31 +02:00
} elsif ( $ statefile eq 'unix' ) {
# should be default for secure migrations as a ssh TCP forward
# tunnel is not deterministic reliable ready and fails regurarly
# to set up in time, so use UNIX socket forwards
2022-11-17 14:33:43 +01:00
my $ migrate = $ res - > { migrate } = { proto = > 'unix' } ;
$ migrate - > { addr } = "/run/qemu-server/$vmid.migrate" ;
unlink $ migrate - > { addr } ;
2016-06-03 15:59:15 +02:00
2022-11-17 14:33:43 +01:00
$ migrate - > { uri } = "unix:$migrate->{addr}" ;
push @$ cmd , '-incoming' , $ migrate - > { uri } ;
2020-03-30 13:41:31 +02:00
push @$ cmd , '-S' ;
migrate: use ssh forwarded UNIX socket tunnel
We cannot guarantee when the SSH forward Tunnel really becomes
ready. The check with the mtunnel API call did not help for this
prolem as it only checked that the SSH connection itself works and
that the destination node has quorum but the forwarded tunnel itself
was not checked.
The Forward tunnel is a different channel in the SSH connection,
independent of the SSH `qm mtunnel` channel, so only if that works
it does not guarantees that our migration tunnel is up and ready.
When the node(s) where under load, or when we did parallel
migrations (migrateall), the migrate command was often started
before a tunnel was open and ready to receive data. This led to
a direct abortion of the migration and is the main cause in why
parallel migrations often leave two thirds or more VMs on the
source node.
The issue was tracked down to SSH after debugging the QEMU
process and enabling debug logging showed that the tunnel became
often to late available and ready, or not at all.
Fixing the TCP forward tunnel is quirky and not straight ahead, the
only way SSH gives as a possibility is to use -N (no command)
-f (background) and -o "ExitOnForwardFailure=yes", then it would
wait in the foreground until the tunnel is ready and only then
background itself. This is not quite the nicest way for our special
use case and our code base.
Waiting for the local port to become open and ready (through
/proc/net/tcp[6]] as a proof of concept is not enough, even if the
port is in the listening state and should theoretically accept
connections this still failed often as the tunnel was not yet fully
ready.
Further another problem would still be open if we tried to patch the
SSH Forward method we currently use - which we solve for free with
the approach of this patch - namely the problem that the method
to get an available port (next_migration_port) has a serious race
condition which could lead to multiple use of the same port on a
parallel migration (I observed this on my many test, seldom but if
it happens its really bad).
So lets now use UNIX sockets, which ssh supports since version 5.7.
The end points are UNIX socket bound to the VMID - thus no port so
no race and also no limitation of available ports (we reserved 50 for
migration).
The endpoints get created in /run/qemu-server/VMID.migrate and as
KVM/QEMU in current versions is able to use UNIX socket just as well
as TCP we have not to change much on the interaction with QEMU.
QEMU is started with the migrate_incoming url at the local
destination endpoint and creates the socket file, we then create
a listening socket on the source side and connect over SSH to the
destination.
Now the migration can be started by issuing the migrate qmp command
with an updated uri.
This breaks live migration from new to old, but *not* from old to
new, so there is a upgrade path.
If a live migration from new to old must be made (for whatever
reason), use the unsecure_migration setting (man datacenter.conf)
to allow this, although that should only be done in trusted network.
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
2016-06-03 11:32:00 +02:00
2020-03-30 13:41:31 +02:00
} elsif ( - e $ statefile ) {
push @$ cmd , '-loadstate' , $ statefile ;
} else {
my $ statepath = PVE::Storage:: path ( $ storecfg , $ statefile ) ;
push @$ vollist , $ statefile ;
push @$ cmd , '-loadstate' , $ statepath ;
}
} elsif ( $ params - > { paused } ) {
push @$ cmd , '-S' ;
}
2021-10-07 15:45:31 +02:00
my $ start_timeout = $ params - > { timeout } // config_aware_timeout ( $ conf , $ resume ) ;
my $ pci_devices = { } ; # host pci devices
2020-06-18 16:36:53 +02:00
for ( my $ i = 0 ; $ i < $ PVE:: QemuServer:: PCI:: MAX_HOSTPCI_DEVICES ; $ i + + ) {
2021-10-07 15:45:31 +02:00
my $ dev = $ conf - > { "hostpci$i" } or next ;
$ pci_devices - > { $ i } = parse_hostpci ( $ dev ) ;
}
2021-11-15 14:07:35 +01:00
# do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
my $ real_pci_devices = [ grep { ! ( defined ( $ _ - > { mdev } ) && scalar ( $ _ - > { pciid } - > @ * ) == 1 ) } values $ pci_devices - > % * ] ;
# map to a flat list of pci ids
my $ pci_id_list = [ map { $ _ - > { id } } map { $ _ - > { pciid } - > @ * } $ real_pci_devices - > @ * ] ;
2021-10-07 15:45:31 +02:00
# reserve all PCI IDs before actually doing anything with them
PVE::QemuServer::PCI:: reserve_pci_usage ( $ pci_id_list , $ vmid , $ start_timeout ) ;
eval {
2022-08-12 11:29:49 +02:00
my $ uuid ;
2021-10-07 15:45:31 +02:00
for my $ id ( sort keys %$ pci_devices ) {
my $ d = $ pci_devices - > { $ id } ;
for my $ dev ( $ d - > { pciid } - > @ * ) {
2022-08-12 11:29:49 +02:00
my $ info = PVE::QemuServer::PCI:: prepare_pci_device ( $ vmid , $ dev - > { id } , $ id , $ d - > { mdev } ) ;
# nvidia grid needs the uuid of the mdev as qemu parameter
if ( $ d - > { mdev } && ! defined ( $ uuid ) && $ info - > { vendor } eq '10de' ) {
$ uuid = PVE::QemuServer::PCI:: generate_mdev_uuid ( $ vmid , $ id ) ;
}
2021-10-07 15:45:31 +02:00
}
}
2022-08-12 11:29:49 +02:00
push @$ cmd , '-uuid' , $ uuid if defined ( $ uuid ) ;
2021-10-07 15:45:31 +02:00
} ;
if ( my $ err = $@ ) {
2022-09-20 14:50:16 +02:00
eval { cleanup_pci_devices ( $ vmid , $ conf ) } ;
2021-10-07 15:45:31 +02:00
warn $@ if $@ ;
die $ err ;
2020-03-30 13:41:31 +02:00
}
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
PVE::Storage:: activate_volumes ( $ storecfg , $ vollist ) ;
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
eval {
2021-10-18 09:40:03 +02:00
run_command ( [ '/bin/systemctl' , 'stop' , "$vmid.scope" ] , outfunc = > sub { } , errfunc = > sub { } ) ;
2020-03-30 13:41:31 +02:00
} ;
# Issues with the above 'stop' not being fully completed are extremely rare, a very low
# timeout should be more than enough here...
2022-02-23 10:03:24 +01:00
PVE::Systemd:: wait_for_unit_removed ( "$vmid.scope" , 20 ) ;
2020-03-30 13:41:31 +02:00
2022-10-07 14:41:48 +02:00
my $ cpuunits = PVE::CGroup:: clamp_cpu_shares ( $ conf - > { cpuunits } ) ;
2020-03-30 13:41:31 +02:00
my % run_params = (
timeout = > $ statefile ? undef : $ start_timeout ,
umask = > 0077 ,
noerr = > 1 ,
) ;
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
# when migrating, prefix QEMU output so other side can pick up any
# errors that might occur and show the user
if ( $ migratedfrom ) {
$ run_params { quiet } = 1 ;
$ run_params { logfunc } = sub { print "QEMU: $_[0]\n" } ;
}
2019-12-09 16:14:09 +01:00
2021-10-18 09:41:18 +02:00
my % systemd_properties = (
2020-03-30 13:41:31 +02:00
Slice = > 'qemu.slice' ,
use KillMode 'process' for systemd scope
KillMode 'none' is deprecated, and systemd loudly complains about that
in the journal. To avoid the warning, but keep the behaviour the same,
use KillMode 'process'.
This mode does two things differently, which we have to stop it from
doing:
* it sends SIGTERM right when the scope is cancelled (e.g. on shutdown)
-> but only to the "root" process, which in our case is the worker
instance forking QEMU, so it is already dead by the time this happens
* it sends SIGKILL to *all* children after a timeout
-> can be avoided by setting either SendSIGKILL to false, or
TimeoutStopUSec to infinity - for safety, we do both
In my testing, this replicated the previous behaviour exactly, but
without using the deprecated 'none' mode.
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-06-21 18:35:41 +02:00
KillMode = > 'process' ,
SendSIGKILL = > 0 ,
TimeoutStopUSec = > ULONG_MAX , # infinity
2020-03-30 13:41:31 +02:00
) ;
2016-06-15 05:04:02 +02:00
2020-10-30 10:42:36 +01:00
if ( PVE::CGroup:: cgroup_mode ( ) == 2 ) {
2021-10-18 09:41:18 +02:00
$ systemd_properties { CPUWeight } = $ cpuunits ;
2020-10-30 10:42:36 +01:00
} else {
2021-10-18 09:41:18 +02:00
$ systemd_properties { CPUShares } = $ cpuunits ;
2020-10-30 10:42:36 +01:00
}
2020-03-30 13:41:31 +02:00
if ( my $ cpulimit = $ conf - > { cpulimit } ) {
2021-10-18 09:41:18 +02:00
$ systemd_properties { CPUQuota } = int ( $ cpulimit * 100 ) ;
2020-03-30 13:41:31 +02:00
}
2021-10-18 09:41:18 +02:00
$ systemd_properties { timeout } = 10 if $ statefile ; # setting up the scope shoul be quick
2016-06-15 05:04:02 +02:00
2020-03-30 13:41:31 +02:00
my $ run_qemu = sub {
PVE::Tools:: run_fork sub {
2021-10-18 09:41:18 +02:00
PVE::Systemd:: enter_systemd_scope ( $ vmid , "Proxmox VE VM $vmid" , % systemd_properties ) ;
2019-12-09 16:14:08 +01:00
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
my $ tpmpid ;
if ( my $ tpm = $ conf - > { tpmstate0 } ) {
# start the TPM emulator so QEMU can connect on start
$ tpmpid = start_swtpm ( $ storecfg , $ vmid , $ tpm , $ migratedfrom ) ;
}
2020-03-30 13:41:31 +02:00
my $ exitcode = run_command ( $ cmd , % run_params ) ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
if ( $ exitcode ) {
2021-10-27 08:49:37 +02:00
if ( $ tpmpid ) {
warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n" ;
kill 'TERM' , $ tpmpid ;
}
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
die "QEMU exited with code $exitcode\n" ;
}
2018-05-07 12:09:10 +02:00
} ;
2020-03-30 13:41:31 +02:00
} ;
2018-05-07 12:09:10 +02:00
2020-03-30 13:41:31 +02:00
if ( $ conf - > { hugepages } ) {
2016-06-15 05:04:02 +02:00
2020-03-30 13:41:31 +02:00
my $ code = sub {
2023-02-13 13:00:09 +01:00
my $ hotplug_features =
parse_hotplug_features ( defined ( $ conf - > { hotplug } ) ? $ conf - > { hotplug } : '1' ) ;
my $ hugepages_topology =
PVE::QemuServer::Memory:: hugepages_topology ( $ conf , $ hotplug_features - > { memory } ) ;
2020-03-30 13:41:31 +02:00
my $ hugepages_host_topology = PVE::QemuServer::Memory:: hugepages_host_topology ( ) ;
2016-06-15 05:04:02 +02:00
2020-03-30 13:41:31 +02:00
PVE::QemuServer::Memory:: hugepages_mount ( ) ;
PVE::QemuServer::Memory:: hugepages_allocate ( $ hugepages_topology , $ hugepages_host_topology ) ;
2016-06-15 05:04:02 +02:00
2018-05-07 12:09:10 +02:00
eval { $ run_qemu - > ( ) } ;
2020-03-30 13:41:31 +02:00
if ( my $ err = $@ ) {
2020-09-02 11:03:37 +02:00
PVE::QemuServer::Memory:: hugepages_reset ( $ hugepages_host_topology )
if ! $ conf - > { keephugepages } ;
2020-03-30 13:41:31 +02:00
die $ err ;
}
2016-02-19 11:13:01 +01:00
2020-09-02 11:03:37 +02:00
PVE::QemuServer::Memory:: hugepages_pre_deallocate ( $ hugepages_topology )
if ! $ conf - > { keephugepages } ;
2020-03-30 13:41:31 +02:00
} ;
eval { PVE::QemuServer::Memory:: hugepages_update_locked ( $ code ) ; } ;
2011-08-23 07:47:04 +02:00
2020-03-30 13:41:31 +02:00
} else {
eval { $ run_qemu - > ( ) } ;
}
2012-01-27 09:35:26 +01:00
2020-03-30 13:41:31 +02:00
if ( my $ err = $@ ) {
# deactivate volumes if start fails
eval { PVE::Storage:: deactivate_volumes ( $ storecfg , $ vollist ) ; } ;
2022-09-20 14:50:16 +02:00
warn $@ if $@ ;
eval { cleanup_pci_devices ( $ vmid , $ conf ) } ;
warn $@ if $@ ;
2021-10-07 15:45:31 +02:00
2020-03-30 13:41:31 +02:00
die "start failed: $err" ;
}
2012-09-19 10:40:30 +02:00
2021-10-07 15:45:31 +02:00
# re-reserve all PCI IDs now that we can know the actual VM PID
my $ pid = PVE::QemuServer::Helpers:: vm_running_locally ( $ vmid ) ;
eval { PVE::QemuServer::PCI:: reserve_pci_usage ( $ pci_id_list , $ vmid , undef , $ pid ) } ;
warn $@ if $@ ;
2022-11-17 14:33:43 +01:00
if ( defined ( $ res - > { migrate } ) ) {
print "migration listens on $res->{migrate}->{uri}\n" ;
} elsif ( $ statefile ) {
2020-03-30 13:41:31 +02:00
eval { mon_cmd ( $ vmid , "cont" ) ; } ;
warn $@ if $@ ;
}
2017-01-03 15:03:15 +01:00
2020-03-30 13:41:31 +02:00
#start nbd server for storage migration
2020-03-30 13:41:32 +02:00
if ( my $ nbd = $ migrate_opts - > { nbd } ) {
2020-03-30 13:41:31 +02:00
my $ nbd_protocol_version = $ migrate_opts - > { nbd_proto_version } // 0 ;
2017-01-03 15:03:15 +01:00
2020-03-30 13:41:31 +02:00
my $ migrate_storage_uri ;
# nbd_protocol_version > 0 for unix socket support
2022-11-17 14:33:44 +01:00
if ( $ nbd_protocol_version > 0 && ( $ migration_type eq 'secure' || $ migration_type eq 'websocket' ) ) {
2020-03-30 13:41:31 +02:00
my $ socket_path = "/run/qemu-server/$vmid\_nbd.migrate" ;
mon_cmd ( $ vmid , "nbd-server-start" , addr = > { type = > 'unix' , data = > { path = > $ socket_path } } ) ;
$ migrate_storage_uri = "nbd:unix:$socket_path" ;
2022-11-17 14:33:43 +01:00
$ res - > { migrate } - > { unix_sockets } = [ $ socket_path ] ;
2020-03-30 13:41:31 +02:00
} else {
my $ nodename = nodename ( ) ;
my $ localip = $ get_migration_ip - > ( $ nodename ) ;
my $ pfamily = PVE::Tools:: get_host_address_family ( $ nodename ) ;
my $ storage_migrate_port = PVE::Tools:: next_migrate_port ( $ pfamily ) ;
2020-09-02 14:07:02 +02:00
mon_cmd ( $ vmid , "nbd-server-start" , addr = > {
type = > 'inet' ,
data = > {
host = > "${localip}" ,
port = > "${storage_migrate_port}" ,
} ,
} ) ;
2020-03-30 13:41:31 +02:00
$ localip = "[$localip]" if Net::IP:: ip_is_ipv6 ( $ localip ) ;
$ migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}" ;
2017-01-03 15:03:15 +01:00
}
2022-12-02 13:54:52 +01:00
my $ block_info = mon_cmd ( $ vmid , "query-block" ) ;
$ block_info = { map { $ _ - > { device } = > $ _ } $ block_info - > @ * } ;
2020-03-30 13:41:32 +02:00
foreach my $ opt ( sort keys %$ nbd ) {
2020-03-30 13:41:35 +02:00
my $ drivestr = $ nbd - > { $ opt } - > { drivestr } ;
my $ volid = $ nbd - > { $ opt } - > { volid } ;
2022-12-02 13:54:52 +01:00
my $ block_node = $ block_info - > { "drive-$opt" } - > { inserted } - > { 'node-name' } ;
mon_cmd (
$ vmid ,
"block-export-add" ,
id = > "drive-$opt" ,
'node-name' = > $ block_node ,
writable = > JSON:: true ,
type = > "nbd" ,
name = > "drive-$opt" , # NBD export name
) ;
2020-03-30 13:41:36 +02:00
my $ nbd_uri = "$migrate_storage_uri:exportname=drive-$opt" ;
print "storage migration listens on $nbd_uri volume:$drivestr\n" ;
2020-03-30 13:41:35 +02:00
print "re-using replicated volume: $opt - $volid\n"
if $ nbd - > { $ opt } - > { replicated } ;
2020-03-30 13:41:36 +02:00
$ res - > { drives } - > { $ opt } = $ nbd - > { $ opt } ;
$ res - > { drives } - > { $ opt } - > { nbd_uri } = $ nbd_uri ;
2020-03-30 13:41:31 +02:00
}
}
2014-01-09 10:58:48 +01:00
2020-03-30 13:41:31 +02:00
if ( $ migratedfrom ) {
eval {
set_migration_caps ( $ vmid ) ;
} ;
warn $@ if $@ ;
if ( $ spice_port ) {
print "spice listens on port $spice_port\n" ;
2020-03-30 13:41:36 +02:00
$ res - > { spice_port } = $ spice_port ;
2020-03-30 13:41:31 +02:00
if ( $ migrate_opts - > { spice_ticket } ) {
2020-09-02 14:07:02 +02:00
mon_cmd ( $ vmid , "set_password" , protocol = > 'spice' , password = >
$ migrate_opts - > { spice_ticket } ) ;
2020-03-30 13:41:31 +02:00
mon_cmd ( $ vmid , "expire_password" , protocol = > 'spice' , time = > "+30" ) ;
2013-07-24 09:52:33 +02:00
}
2020-03-30 13:41:31 +02:00
}
2013-07-24 09:52:33 +02:00
2020-03-30 13:41:31 +02:00
} else {
mon_cmd ( $ vmid , "balloon" , value = > $ conf - > { balloon } * 1024 * 1024 )
if ! $ statefile && $ conf - > { balloon } ;
2015-01-20 11:47:11 +01:00
2020-03-30 13:41:31 +02:00
foreach my $ opt ( keys %$ conf ) {
next if $ opt !~ m/^net\d+$/ ;
my $ nicconf = parse_net ( $ conf - > { $ opt } ) ;
qemu_set_link_status ( $ vmid , $ opt , 0 ) if $ nicconf - > { link_down } ;
2012-08-28 12:46:08 +02:00
}
2022-08-24 18:26:43 +02:00
add_nets_bridge_fdb ( $ conf , $ vmid ) ;
2020-03-30 13:41:31 +02:00
}
2015-03-27 06:16:24 +01:00
2023-01-16 13:46:29 +01:00
if ( ! defined ( $ conf - > { balloon } ) || $ conf - > { balloon } ) {
2023-02-23 10:49:03 +01:00
eval {
mon_cmd (
$ vmid ,
'qom-set' ,
path = > "machine/peripheral/balloon0" ,
property = > "guest-stats-polling-interval" ,
value = > 2
) ;
} ;
log_warn ( "could not set polling interval for ballooning - $@" ) if $@ ;
2023-01-16 13:46:29 +01:00
}
2015-03-09 08:22:12 +01:00
2020-03-30 13:41:31 +02:00
if ( $ resume ) {
print "Resumed VM, removing state\n" ;
if ( my $ vmstate = $ conf - > { vmstate } ) {
PVE::Storage:: deactivate_volumes ( $ storecfg , [ $ vmstate ] ) ;
PVE::Storage:: vdisk_free ( $ storecfg , $ vmstate ) ;
2019-03-14 17:04:48 +01:00
}
2020-04-07 15:56:16 +02:00
delete $ conf - > @ { qw( lock vmstate runningmachine runningcpu ) } ;
2020-03-30 13:41:31 +02:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
}
2019-03-14 17:04:48 +01:00
2020-03-30 13:41:31 +02:00
PVE::GuestHelpers:: exec_hookscript ( $ conf , $ vmid , 'post-start' ) ;
2020-03-30 13:41:36 +02:00
return $ res ;
2011-08-23 07:47:04 +02:00
}
sub vm_commandline {
2019-01-30 14:43:38 +01:00
my ( $ storecfg , $ vmid , $ snapname ) = @ _ ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
2021-11-03 16:21:05 +01:00
my ( $ forcemachine , $ forcecpu ) ;
2019-01-30 14:43:38 +01:00
if ( $ snapname ) {
my $ snapshot = $ conf - > { snapshots } - > { $ snapname } ;
2019-01-30 15:08:15 +01:00
die "snapshot '$snapname' does not exist\n" if ! defined ( $ snapshot ) ;
2020-04-07 15:56:16 +02:00
# check for machine or CPU overrides in snapshot
$ forcemachine = $ snapshot - > { runningmachine } ;
$ forcecpu = $ snapshot - > { runningcpu } ;
2019-11-20 13:24:56 +01:00
2019-01-30 15:08:15 +01:00
$ snapshot - > { digest } = $ conf - > { digest } ; # keep file digest for API
2019-01-30 14:43:38 +01:00
$ conf = $ snapshot ;
}
2011-08-23 07:47:04 +02:00
my $ defaults = load_defaults ( ) ;
2021-11-03 16:21:05 +01:00
my $ cmd = config_to_command ( $ storecfg , $ vmid , $ conf , $ defaults , $ forcemachine , $ forcecpu ) ;
2011-08-23 07:47:04 +02:00
2016-09-21 15:14:18 +02:00
return PVE::Tools:: cmd2string ( $ cmd ) ;
2011-08-23 07:47:04 +02:00
}
sub vm_reset {
my ( $ vmid , $ skiplock ) = @ _ ;
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , sub {
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > check_lock ( $ conf ) if ! $ skiplock ;
2011-08-23 07:47:04 +02:00
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "system_reset" ) ;
2011-11-25 08:05:36 +01:00
} ) ;
}
sub get_vm_volumes {
my ( $ conf ) = @ _ ;
2011-08-23 07:47:04 +02:00
2011-11-25 08:05:36 +01:00
my $ vollist = [] ;
2012-09-25 07:42:01 +02:00
foreach_volid ( $ conf , sub {
2017-06-13 06:47:05 +02:00
my ( $ volid , $ attr ) = @ _ ;
2011-11-25 08:05:36 +01:00
2012-09-25 07:42:01 +02:00
return if $ volid =~ m | ^ / | ;
2011-11-25 08:05:36 +01:00
2012-09-25 07:42:01 +02:00
my ( $ sid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
return if ! $ sid ;
2011-11-25 08:05:36 +01:00
push @$ vollist , $ volid ;
2011-08-23 07:47:04 +02:00
} ) ;
2011-11-25 08:05:36 +01:00
return $ vollist ;
}
2022-09-20 14:50:16 +02:00
sub cleanup_pci_devices {
my ( $ vmid , $ conf ) = @ _ ;
foreach my $ key ( keys %$ conf ) {
next if $ key !~ m/^hostpci(\d+)$/ ;
my $ hostpciindex = $ 1 ;
my $ uuid = PVE::SysFSTools:: generate_mdev_uuid ( $ vmid , $ hostpciindex ) ;
my $ d = parse_hostpci ( $ conf - > { $ key } ) ;
2022-11-10 17:01:48 +01:00
if ( $ d - > { mdev } ) {
# NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
# don't want to break ABI just for this two liner
my $ dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid" ;
PVE::SysFSTools:: file_write ( "$dev_sysfs_dir/remove" , "1" ) if - e $ dev_sysfs_dir ;
}
2022-09-20 14:50:16 +02:00
}
PVE::QemuServer::PCI:: remove_pci_reservation ( $ vmid ) ;
}
2011-11-25 08:05:36 +01:00
sub vm_stop_cleanup {
2015-01-21 10:42:43 +01:00
my ( $ storecfg , $ vmid , $ conf , $ keepActive , $ apply_pending_changes ) = @ _ ;
2011-11-25 08:05:36 +01:00
2011-12-16 06:34:35 +01:00
eval {
2011-11-25 08:05:36 +01:00
2012-01-17 11:56:56 +01:00
if ( ! $ keepActive ) {
my $ vollist = get_vm_volumes ( $ conf ) ;
PVE::Storage:: deactivate_volumes ( $ storecfg , $ vollist ) ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
if ( my $ tpmdrive = $ conf - > { tpmstate0 } ) {
my $ tpm = parse_drive ( "tpmstate0" , $ tpmdrive ) ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ tpm - > { file } , 1 ) ;
if ( $ storeid ) {
PVE::Storage:: unmap_volume ( $ storecfg , $ tpm - > { file } ) ;
}
}
2012-01-17 11:56:56 +01:00
}
2015-03-27 06:16:24 +01:00
2012-09-03 09:51:08 +02:00
foreach my $ ext ( qw( mon qmp pid vnc qga ) ) {
2012-03-01 12:54:06 +01:00
unlink "/var/run/qemu-server/${vmid}.$ext" ;
}
2015-03-27 06:16:24 +01:00
2019-02-22 11:38:33 +01:00
if ( $ conf - > { ivshmem } ) {
2020-09-02 14:07:02 +02:00
my $ ivshmem = parse_property_string ( $ ivshmem_fmt , $ conf - > { ivshmem } ) ;
2019-02-26 08:20:37 +01:00
# just delete it for now, VMs which have this already open do not
# are affected, but new VMs will get a separated one. If this
# becomes an issue we either add some sort of ref-counting or just
# add a "don't delete on stop" flag to the ivshmem format.
2019-02-22 11:38:33 +01:00
unlink '/dev/shm/pve-shm-' . ( $ ivshmem - > { name } // $ vmid ) ;
}
2022-09-20 14:50:16 +02:00
cleanup_pci_devices ( $ vmid , $ conf ) ;
2018-11-20 17:13:39 +01:00
2015-01-21 10:42:43 +01:00
vmconfig_apply_pending ( $ vmid , $ conf , $ storecfg ) if $ apply_pending_changes ;
2011-12-16 06:34:35 +01:00
} ;
warn $@ if $@ ; # avoid errors - just warn
2011-08-23 07:47:04 +02:00
}
2019-09-11 14:07:44 +02:00
# call only in locked context
sub _do_vm_stop {
my ( $ storecfg , $ vmid , $ skiplock , $ nocheck , $ timeout , $ shutdown , $ force , $ keepActive ) = @ _ ;
2011-12-15 12:47:39 +01:00
2019-09-11 14:07:44 +02:00
my $ pid = check_running ( $ vmid , $ nocheck ) ;
return if ! $ pid ;
2011-08-23 07:47:04 +02:00
2019-09-11 14:07:44 +02:00
my $ conf ;
if ( ! $ nocheck ) {
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
PVE::QemuConfig - > check_lock ( $ conf ) if ! $ skiplock ;
if ( ! defined ( $ timeout ) && $ shutdown && $ conf - > { startup } ) {
my $ opts = PVE::JSONSchema:: pve_parse_startup_order ( $ conf - > { startup } ) ;
$ timeout = $ opts - > { down } if $ opts - > { down } ;
2011-09-15 08:21:32 +02:00
}
2019-09-11 14:07:44 +02:00
PVE::GuestHelpers:: exec_hookscript ( $ conf , $ vmid , 'pre-stop' ) ;
}
2011-09-12 12:26:00 +02:00
2019-09-11 14:07:44 +02:00
eval {
if ( $ shutdown ) {
2020-11-20 12:39:45 +01:00
if ( defined ( $ conf ) && get_qga_key ( $ conf , 'enabled' ) ) {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "guest-shutdown" , timeout = > $ timeout ) ;
2011-12-15 12:47:39 +01:00
} else {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "system_powerdown" ) ;
2011-08-23 07:47:04 +02:00
}
} else {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "quit" ) ;
2011-08-23 07:47:04 +02:00
}
2019-09-11 14:07:44 +02:00
} ;
my $ err = $@ ;
2011-08-23 07:47:04 +02:00
2019-09-11 14:07:44 +02:00
if ( ! $ err ) {
$ timeout = 60 if ! defined ( $ timeout ) ;
2011-08-23 07:47:04 +02:00
my $ count = 0 ;
2011-09-15 08:21:32 +02:00
while ( ( $ count < $ timeout ) && check_running ( $ vmid , $ nocheck ) ) {
2011-08-23 07:47:04 +02:00
$ count + + ;
sleep 1 ;
}
if ( $ count >= $ timeout ) {
2019-09-11 14:07:44 +02:00
if ( $ force ) {
warn "VM still running - terminating now with SIGTERM\n" ;
kill 15 , $ pid ;
} else {
die "VM quit/powerdown failed - got timeout\n" ;
}
} else {
vm_stop_cleanup ( $ storecfg , $ vmid , $ conf , $ keepActive , 1 ) if $ conf ;
return ;
2011-08-23 07:47:04 +02:00
}
2019-09-11 14:07:44 +02:00
} else {
2020-05-03 11:12:54 +02:00
if ( ! check_running ( $ vmid , $ nocheck ) ) {
warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n" ;
return ;
}
if ( $ force ) {
2019-09-11 14:07:44 +02:00
warn "VM quit/powerdown failed - terminating now with SIGTERM\n" ;
kill 15 , $ pid ;
} else {
die "VM quit/powerdown failed\n" ;
}
}
# wait again
$ timeout = 10 ;
my $ count = 0 ;
while ( ( $ count < $ timeout ) && check_running ( $ vmid , $ nocheck ) ) {
$ count + + ;
sleep 1 ;
}
if ( $ count >= $ timeout ) {
warn "VM still running - terminating now with SIGKILL\n" ;
kill 9 , $ pid ;
sleep 1 ;
}
2011-08-23 07:47:04 +02:00
2019-09-11 14:07:44 +02:00
vm_stop_cleanup ( $ storecfg , $ vmid , $ conf , $ keepActive , 1 ) if $ conf ;
}
# Note: use $nocheck to skip tests if VM configuration file exists.
# We need that when migration VMs to other nodes (files already moved)
# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
sub vm_stop {
my ( $ storecfg , $ vmid , $ skiplock , $ nocheck , $ timeout , $ shutdown , $ force , $ keepActive , $ migratedfrom ) = @ _ ;
$ force = 1 if ! defined ( $ force ) && ! $ shutdown ;
if ( $ migratedfrom ) {
my $ pid = check_running ( $ vmid , $ nocheck , $ migratedfrom ) ;
kill 15 , $ pid if $ pid ;
my $ conf = PVE::QemuConfig - > load_config ( $ vmid , $ migratedfrom ) ;
vm_stop_cleanup ( $ storecfg , $ vmid , $ conf , $ keepActive , 0 ) ;
return ;
}
PVE::QemuConfig - > lock_config ( $ vmid , sub {
_do_vm_stop ( $ storecfg , $ vmid , $ skiplock , $ nocheck , $ timeout , $ shutdown , $ force , $ keepActive ) ;
2011-11-25 08:05:36 +01:00
} ) ;
2011-08-23 07:47:04 +02:00
}
2019-09-11 14:07:45 +02:00
sub vm_reboot {
my ( $ vmid , $ timeout ) = @ _ ;
PVE::QemuConfig - > lock_config ( $ vmid , sub {
2019-11-11 17:29:23 +01:00
eval {
2019-09-11 14:07:45 +02:00
2019-11-11 17:29:23 +01:00
# only reboot if running, as qmeventd starts it again on a stop event
return if ! check_running ( $ vmid ) ;
2019-09-11 14:07:45 +02:00
2019-11-11 17:29:23 +01:00
create_reboot_request ( $ vmid ) ;
2019-09-11 14:07:45 +02:00
2019-11-11 17:29:23 +01:00
my $ storecfg = PVE::Storage:: config ( ) ;
_do_vm_stop ( $ storecfg , $ vmid , undef , undef , $ timeout , 1 ) ;
2019-09-11 14:07:45 +02:00
2019-11-11 17:29:23 +01:00
} ;
if ( my $ err = $@ ) {
2019-11-11 18:05:20 +01:00
# avoid that the next normal shutdown will be confused for a reboot
2019-11-11 17:29:23 +01:00
clear_reboot_request ( $ vmid ) ;
die $ err ;
}
2019-09-11 14:07:45 +02:00
} ) ;
}
2019-12-09 15:26:59 +01:00
# note: if using the statestorage parameter, the caller has to check privileges
2011-08-23 07:47:04 +02:00
sub vm_suspend {
2019-03-14 17:04:50 +01:00
my ( $ vmid , $ skiplock , $ includestate , $ statestorage ) = @ _ ;
2019-03-14 17:04:47 +01:00
my $ conf ;
my $ path ;
my $ storecfg ;
my $ vmstate ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , sub {
2011-08-23 07:47:04 +02:00
2019-03-14 17:04:47 +01:00
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2011-08-23 07:47:04 +02:00
2019-03-14 17:04:47 +01:00
my $ is_backing_up = PVE::QemuConfig - > has_lock ( $ conf , 'backup' ) ;
2016-03-11 12:11:57 +01:00
PVE::QemuConfig - > check_lock ( $ conf )
2019-03-14 17:04:47 +01:00
if ! ( $ skiplock || $ is_backing_up ) ;
die "cannot suspend to disk during backup\n"
if $ is_backing_up && $ includestate ;
2012-06-26 06:42:18 +02:00
2019-03-14 17:04:47 +01:00
if ( $ includestate ) {
$ conf - > { lock } = 'suspending' ;
my $ date = strftime ( "%Y-%m-%d" , localtime ( time ( ) ) ) ;
$ storecfg = PVE::Storage:: config ( ) ;
2019-12-09 15:26:59 +01:00
if ( ! $ statestorage ) {
$ statestorage = find_vmstate_storage ( $ conf , $ storecfg ) ;
# check permissions for the storage
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
if ( $ rpcenv - > { type } ne 'cli' ) {
my $ authuser = $ rpcenv - > get_user ( ) ;
$ rpcenv - > check ( $ authuser , "/storage/$statestorage" , [ 'Datastore.AllocateSpace' ] ) ;
}
}
2020-09-02 14:07:02 +02:00
$ vmstate = PVE::QemuConfig - > __snapshot_save_vmstate (
$ vmid , $ conf , "suspend-$date" , $ storecfg , $ statestorage , 1 ) ;
2019-03-14 17:04:47 +01:00
$ path = PVE::Storage:: path ( $ storecfg , $ vmstate ) ;
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
} else {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "stop" ) ;
2019-03-14 17:04:47 +01:00
}
2011-08-23 07:47:04 +02:00
} ) ;
2019-03-14 17:04:47 +01:00
if ( $ includestate ) {
# save vm state
PVE::Storage:: activate_volumes ( $ storecfg , [ $ vmstate ] ) ;
eval {
snapshot: set migration caps before savevm-start
A "savevm" call (both our async variant and the upstream sync one) use
migration code internally. As such, they both expect migration
capabilities to be set.
This is usually not a problem, as the default set of capabilities is ok,
however, it leads to differing snapshot settings if one does a snapshot
after a machine has been live-migrated (as the capabilities will persist
from that), which could potentially lead to discrepencies in snapshots
(currently it seems to be fine, but it still makes sense to set them to
safeguard against future changes).
Note that we do set the "dirty-bitmaps" capability now (if
query-proxmox-support reports true), which has three effects:
1) PBS dirty-bitmaps are preserved in snapshots, enabling
fast-incremental backups to work after rollback (as long as no newer
backups exist), including for hibernate/resume
2) snapshots taken from now on, with a QEMU version supporting bitmap
migration, *might* lead to incompatibility of these snapshots with
QEMU versions that don't know about bitmaps at all (i.e. < 5.0 IIRC?)
- forward compatibility is still given, and all other capabilities we
set go back to very old versions
3) since we now explicitly disable bitmap saving if the version doesn't
report support, we avoid crashes even with not-updated QEMU versions
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-03-16 17:30:23 +01:00
set_migration_caps ( $ vmid , 1 ) ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "savevm-start" , statefile = > $ path ) ;
2019-03-14 17:04:47 +01:00
for ( ; ; ) {
2019-11-19 12:23:47 +01:00
my $ state = mon_cmd ( $ vmid , "query-savevm" ) ;
2019-03-14 17:04:47 +01:00
if ( ! $ state - > { status } ) {
die "savevm not active\n" ;
} elsif ( $ state - > { status } eq 'active' ) {
sleep ( 1 ) ;
next ;
} elsif ( $ state - > { status } eq 'completed' ) {
2019-03-19 09:17:30 +01:00
print "State saved, quitting\n" ;
2019-03-14 17:04:47 +01:00
last ;
} elsif ( $ state - > { status } eq 'failed' && $ state - > { error } ) {
die "query-savevm failed with error '$state->{error}'\n"
} else {
die "query-savevm returned status '$state->{status}'\n" ;
}
}
} ;
my $ err = $@ ;
PVE::QemuConfig - > lock_config ( $ vmid , sub {
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
if ( $ err ) {
# cleanup, but leave suspending lock, to indicate something went wrong
eval {
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "savevm-end" ) ;
2019-03-14 17:04:47 +01:00
PVE::Storage:: deactivate_volumes ( $ storecfg , [ $ vmstate ] ) ;
PVE::Storage:: vdisk_free ( $ storecfg , $ vmstate ) ;
2020-04-07 15:56:16 +02:00
delete $ conf - > @ { qw( vmstate runningmachine runningcpu ) } ;
2019-03-14 17:04:47 +01:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
} ;
warn $@ if $@ ;
die $ err ;
}
die "lock changed unexpectedly\n"
if ! PVE::QemuConfig - > has_lock ( $ conf , 'suspending' ) ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , "quit" ) ;
2019-03-14 17:04:47 +01:00
$ conf - > { lock } = 'suspended' ;
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
} ) ;
}
2011-08-23 07:47:04 +02:00
}
2022-11-21 13:16:05 +01:00
# $nocheck is set when called as part of a migration - in this context the
# location of the config file (source or target node) is not deterministic,
# since migration cannot wait for pmxcfs to process the rename
2011-08-23 07:47:04 +02:00
sub vm_resume {
2015-10-14 11:06:06 +02:00
my ( $ vmid , $ skiplock , $ nocheck ) = @ _ ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , sub {
2019-11-19 12:23:47 +01:00
my $ res = mon_cmd ( $ vmid , 'query-status' ) ;
2018-06-13 11:17:26 +02:00
my $ resume_cmd = 'cont' ;
2020-10-19 14:18:41 +02:00
my $ reset = 0 ;
2022-11-21 13:16:04 +01:00
my $ conf ;
if ( $ nocheck ) {
2022-11-29 13:09:26 +01:00
$ conf = eval { PVE::QemuConfig - > load_config ( $ vmid ) } ; # try on target node
if ( $@ ) {
my $ vmlist = PVE::Cluster:: get_vmlist ( ) ;
if ( exists ( $ vmlist - > { ids } - > { $ vmid } ) ) {
my $ node = $ vmlist - > { ids } - > { $ vmid } - > { node } ;
$ conf = eval { PVE::QemuConfig - > load_config ( $ vmid , $ node ) } ; # try on source node
}
if ( ! $ conf ) {
PVE::Cluster:: cfs_update ( ) ; # vmlist was wrong, invalidate cache
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ; # last try on target node again
}
2022-11-21 13:16:04 +01:00
}
} else {
$ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
}
2018-06-13 11:17:26 +02:00
2020-10-19 14:18:41 +02:00
if ( $ res - > { status } ) {
return if $ res - > { status } eq 'running' ; # job done, go home
$ resume_cmd = 'system_wakeup' if $ res - > { status } eq 'suspended' ;
$ reset = 1 if $ res - > { status } eq 'shutdown' ;
2018-06-13 11:17:26 +02:00
}
2015-10-14 11:06:06 +02:00
if ( ! $ nocheck ) {
2016-03-11 12:11:57 +01:00
PVE::QemuConfig - > check_lock ( $ conf )
if ! ( $ skiplock || PVE::QemuConfig - > has_lock ( $ conf , 'backup' ) ) ;
2015-10-14 11:06:06 +02:00
}
2019-05-23 21:22:22 +02:00
2020-10-19 14:18:41 +02:00
if ( $ reset ) {
# required if a VM shuts down during a backup and we get a resume
# request before the backup finishes for example
mon_cmd ( $ vmid , "system_reset" ) ;
}
2022-08-24 18:26:43 +02:00
add_nets_bridge_fdb ( $ conf , $ vmid ) if $ resume_cmd eq 'cont' ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , $ resume_cmd ) ;
2011-08-23 07:47:04 +02:00
} ) ;
}
2011-10-10 13:17:40 +02:00
sub vm_sendkey {
my ( $ vmid , $ skiplock , $ key ) = @ _ ;
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , sub {
2011-08-23 07:47:04 +02:00
2016-03-07 12:41:12 +01:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2012-08-23 07:36:48 +02:00
2012-07-13 08:56:13 +02:00
# there is no qmp command, so we use the human monitor command
2019-11-19 12:23:47 +01:00
my $ res = PVE::QemuServer::Monitor:: hmp_cmd ( $ vmid , "sendkey $key" ) ;
2019-07-15 14:35:29 +02:00
die $ res if $ res ne '' ;
2011-08-23 07:47:04 +02:00
} ) ;
}
2011-10-17 13:49:48 +02:00
# vzdump restore implementaion
2013-01-04 06:57:11 +01:00
sub tar_archive_read_firstfile {
2011-10-17 13:49:48 +02:00
my $ archive = shift ;
2012-01-27 09:35:26 +01:00
2011-10-17 13:49:48 +02:00
die "ERROR: file '$archive' does not exist\n" if ! - f $ archive ;
# try to detect archive type first
2016-06-09 16:54:46 +02:00
my $ pid = open ( my $ fh , '-|' , 'tar' , 'tf' , $ archive ) ||
2011-10-17 13:49:48 +02:00
die "unable to open file '$archive'\n" ;
2016-06-09 16:54:46 +02:00
my $ firstfile = <$fh> ;
2011-10-17 13:49:48 +02:00
kill 15 , $ pid ;
2016-06-09 16:54:46 +02:00
close $ fh ;
2011-10-17 13:49:48 +02:00
die "ERROR: archive contaions no data\n" if ! $ firstfile ;
chomp $ firstfile ;
return $ firstfile ;
}
2013-01-04 06:57:11 +01:00
sub tar_restore_cleanup {
my ( $ storecfg , $ statfile ) = @ _ ;
2011-10-17 13:49:48 +02:00
print STDERR "starting cleanup\n" ;
if ( my $ fd = IO::File - > new ( $ statfile , "r" ) ) {
while ( defined ( my $ line = <$fd> ) ) {
if ( $ line =~ m/vzdump:([^\s:]*):(\S+)$/ ) {
my $ volid = $ 2 ;
eval {
if ( $ volid =~ m | ^ / | ) {
unlink $ volid || die 'unlink failed\n' ;
} else {
2013-01-04 06:57:11 +01:00
PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) ;
2011-10-17 13:49:48 +02:00
}
2012-01-27 09:35:26 +01:00
print STDERR "temporary volume '$volid' sucessfuly removed\n" ;
2011-10-17 13:49:48 +02:00
} ;
print STDERR "unable to cleanup '$volid' - $@" if $@ ;
} else {
print STDERR "unable to parse line in statfile - $line" ;
2012-01-27 09:35:26 +01:00
}
2011-10-17 13:49:48 +02:00
}
$ fd - > close ( ) ;
}
}
2020-03-11 07:55:54 +01:00
sub restore_file_archive {
2012-02-02 06:39:38 +01:00
my ( $ archive , $ vmid , $ user , $ opts ) = @ _ ;
2011-10-17 13:49:48 +02:00
2020-05-07 09:15:33 +02:00
return restore_vma_archive ( $ archive , $ vmid , $ user , $ opts )
if $ archive eq '-' ;
2020-04-28 15:58:12 +02:00
my $ info = PVE::Storage:: archive_info ( $ archive ) ;
my $ format = $ opts - > { format } // $ info - > { format } ;
my $ comp = $ info - > { compression } ;
2012-12-12 15:35:26 +01:00
# try to detect archive format
if ( $ format eq 'tar' ) {
return restore_tar_archive ( $ archive , $ vmid , $ user , $ opts ) ;
} else {
return restore_vma_archive ( $ archive , $ vmid , $ user , $ opts , $ comp ) ;
}
}
2020-03-11 07:55:54 +01:00
# hepler to remove disks that will not be used after restore
my $ restore_cleanup_oldconf = sub {
my ( $ storecfg , $ vmid , $ oldconf , $ virtdev_hash ) = @ _ ;
2022-04-21 13:26:49 +02:00
my $ kept_disks = { } ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ oldconf , sub {
2020-03-11 07:55:54 +01:00
my ( $ ds , $ drive ) = @ _ ;
return if drive_is_cdrom ( $ drive , 1 ) ;
my $ volid = $ drive - > { file } ;
return if ! $ volid || $ volid =~ m | ^ / | ;
my ( $ path , $ owner ) = PVE::Storage:: path ( $ storecfg , $ volid ) ;
return if ! $ path || ! $ owner || ( $ owner != $ vmid ) ;
# Note: only delete disk we want to restore
# other volumes will become unused
if ( $ virtdev_hash - > { $ ds } ) {
eval { PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) ; } ;
if ( my $ err = $@ ) {
warn $ err ;
}
2022-04-21 13:26:49 +02:00
} else {
$ kept_disks - > { $ volid } = 1 ;
2020-03-11 07:55:54 +01:00
}
} ) ;
2022-04-21 13:26:49 +02:00
# after the restore we have no snapshots anymore
for my $ snapname ( keys $ oldconf - > { snapshots } - > % * ) {
2020-03-11 07:55:54 +01:00
my $ snap = $ oldconf - > { snapshots } - > { $ snapname } ;
if ( $ snap - > { vmstate } ) {
eval { PVE::Storage:: vdisk_free ( $ storecfg , $ snap - > { vmstate } ) ; } ;
if ( my $ err = $@ ) {
warn $ err ;
}
}
2022-04-21 13:26:49 +02:00
for my $ volid ( keys $ kept_disks - > % * ) {
eval { PVE::Storage:: volume_snapshot_delete ( $ storecfg , $ volid , $ snapname ) ; } ;
warn $@ if $@ ;
}
2020-03-11 07:55:54 +01:00
}
} ;
2020-03-11 07:55:55 +01:00
# Helper to parse vzdump backup device hints
#
# $rpcenv: Environment, used to ckeck storage permissions
# $user: User ID, to check storage permissions
# $storecfg: Storage configuration
# $fh: the file handle for reading the configuration
# $devinfo: should contain device sizes for all backu-up'ed devices
# $options: backup options (pool, default storage)
#
# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
my $ parse_backup_hints = sub {
my ( $ rpcenv , $ user , $ storecfg , $ fh , $ devinfo , $ options ) = @ _ ;
2020-03-11 07:55:54 +01:00
2022-02-24 13:29:42 +01:00
my $ check_storage = sub { # assert if an image can be allocate
my ( $ storeid , $ scfg ) = @ _ ;
die "Content type 'images' is not available on storage '$storeid'\n"
if ! $ scfg - > { content } - > { images } ;
$ rpcenv - > check ( $ user , "/storage/$storeid" , [ 'Datastore.AllocateSpace' ] )
if $ user ne 'root@pam' ;
} ;
2020-03-11 07:55:54 +01:00
2022-02-24 13:29:42 +01:00
my $ virtdev_hash = { } ;
2020-03-11 07:55:55 +01:00
while ( defined ( my $ line = <$fh> ) ) {
if ( $ line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/ ) {
my ( $ virtdev , $ devname , $ storeid , $ format ) = ( $ 1 , $ 2 , $ 3 , $ 4 ) ;
die "archive does not contain data for drive '$virtdev'\n"
if ! $ devinfo - > { $ devname } ;
if ( defined ( $ options - > { storage } ) ) {
$ storeid = $ options - > { storage } || 'local' ;
} elsif ( ! $ storeid ) {
$ storeid = 'local' ;
2020-03-11 07:55:54 +01:00
}
2020-03-11 07:55:55 +01:00
$ format = 'raw' if ! $ format ;
$ devinfo - > { $ devname } - > { devname } = $ devname ;
$ devinfo - > { $ devname } - > { virtdev } = $ virtdev ;
$ devinfo - > { $ devname } - > { format } = $ format ;
$ devinfo - > { $ devname } - > { storeid } = $ storeid ;
2022-02-24 11:45:59 +01:00
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
2022-02-24 13:29:42 +01:00
$ check_storage - > ( $ storeid , $ scfg ) ; # permission and content type check
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
$ virtdev_hash - > { $ virtdev } = $ devinfo - > { $ devname } ;
} elsif ( $ line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/ ) {
my $ virtdev = $ 1 ;
my $ drive = parse_drive ( $ virtdev , $ 2 ) ;
2022-02-24 13:29:42 +01:00
2020-03-11 07:55:55 +01:00
if ( drive_is_cloudinit ( $ drive ) ) {
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ drive - > { file } ) ;
2020-04-29 11:05:19 +02:00
$ storeid = $ options - > { storage } if defined ( $ options - > { storage } ) ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
2020-03-11 07:55:55 +01:00
my $ format = qemu_img_format ( $ scfg , $ volname ) ; # has 'raw' fallback
2020-03-11 07:55:54 +01:00
2022-02-24 13:29:42 +01:00
$ check_storage - > ( $ storeid , $ scfg ) ; # permission and content type check
2022-02-24 13:21:55 +01:00
2020-03-11 07:55:55 +01:00
$ virtdev_hash - > { $ virtdev } = {
format = > $ format ,
2020-04-29 11:05:19 +02:00
storeid = > $ storeid ,
2020-03-11 07:55:55 +01:00
size = > PVE::QemuServer::Cloudinit:: CLOUDINIT_DISK_SIZE ,
is_cloudinit = > 1 ,
} ;
2020-03-11 07:55:54 +01:00
}
2020-03-11 07:55:55 +01:00
}
}
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
return $ virtdev_hash ;
} ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
# Helper to allocate and activate all volumes required for a restore
#
# $storecfg: Storage configuration
# $virtdev_hash: as returned by parse_backup_hints()
#
# Returns: { $virtdev => $volid }
my $ restore_allocate_devices = sub {
my ( $ storecfg , $ virtdev_hash , $ vmid ) = @ _ ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
my $ map = { } ;
foreach my $ virtdev ( sort keys %$ virtdev_hash ) {
my $ d = $ virtdev_hash - > { $ virtdev } ;
my $ alloc_size = int ( ( $ d - > { size } + 1024 - 1 ) / 1024 ) ;
my $ storeid = $ d - > { storeid } ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
# test if requested format is supported
my ( $ defFormat , $ validFormats ) = PVE::Storage:: storage_default_format ( $ storecfg , $ storeid ) ;
my $ supported = grep { $ _ eq $ d - > { format } } @$ validFormats ;
$ d - > { format } = $ defFormat if ! $ supported ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
my $ name ;
if ( $ d - > { is_cloudinit } ) {
$ name = "vm-$vmid-cloudinit" ;
2020-12-02 13:50:25 +01:00
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
if ( $ scfg - > { path } ) {
$ name . = ".$d->{format}" ;
}
2020-03-11 07:55:54 +01:00
}
2020-09-02 14:07:02 +02:00
my $ volid = PVE::Storage:: vdisk_alloc (
$ storecfg , $ storeid , $ vmid , $ d - > { format } , $ name , $ alloc_size ) ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
print STDERR "new volume ID is '$volid'\n" ;
$ d - > { volid } = $ volid ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
PVE::Storage:: activate_volumes ( $ storecfg , [ $ volid ] ) ;
2020-03-11 07:55:54 +01:00
2020-03-11 07:55:55 +01:00
$ map - > { $ virtdev } = $ volid ;
2020-03-11 07:55:54 +01:00
}
2020-03-11 07:55:55 +01:00
return $ map ;
} ;
2020-03-11 07:55:54 +01:00
2021-03-18 10:44:49 +01:00
sub restore_update_config_line {
2021-03-18 10:44:50 +01:00
my ( $ cookie , $ map , $ line , $ unique ) = @ _ ;
2012-12-12 15:35:26 +01:00
2021-03-08 13:26:57 +01:00
return '' if $ line =~ m/^\#qmdump\#/ ;
return '' if $ line =~ m/^\#vzdump\#/ ;
return '' if $ line =~ m/^lock:/ ;
return '' if $ line =~ m/^unused\d+:/ ;
return '' if $ line =~ m/^parent:/ ;
my $ res = '' ;
2012-12-12 15:35:26 +01:00
2016-07-13 16:25:44 +02:00
my $ dc = PVE::Cluster:: cfs_read_file ( 'datacenter.cfg' ) ;
2012-12-12 15:35:26 +01:00
if ( ( $ line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/ ) ) {
# try to convert old 1.X settings
my ( $ id , $ ind , $ ethcfg ) = ( $ 1 , $ 2 , $ 3 ) ;
foreach my $ devconfig ( PVE::Tools:: split_list ( $ ethcfg ) ) {
my ( $ model , $ macaddr ) = split ( /\=/ , $ devconfig ) ;
2016-07-13 16:25:44 +02:00
$ macaddr = PVE::Tools:: random_ether_addr ( $ dc - > { mac_prefix } ) if ! $ macaddr || $ unique ;
2012-12-12 15:35:26 +01:00
my $ net = {
model = > $ model ,
bridge = > "vmbr$ind" ,
macaddr = > $ macaddr ,
} ;
my $ netstr = print_net ( $ net ) ;
2021-03-08 13:26:57 +01:00
$ res . = "net$cookie->{netcount}: $netstr\n" ;
2012-12-12 15:35:26 +01:00
$ cookie - > { netcount } + + ;
}
} elsif ( ( $ line =~ m/^(net\d+):\s*(\S+)\s*$/ ) && $ unique ) {
my ( $ id , $ netstr ) = ( $ 1 , $ 2 ) ;
my $ net = parse_net ( $ netstr ) ;
2016-07-13 16:25:44 +02:00
$ net - > { macaddr } = PVE::Tools:: random_ether_addr ( $ dc - > { mac_prefix } ) if $ net - > { macaddr } ;
2012-12-12 15:35:26 +01:00
$ netstr = print_net ( $ net ) ;
2021-03-08 13:26:57 +01:00
$ res . = "$id: $netstr\n" ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
} elsif ( $ line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/ ) {
2012-12-12 15:35:26 +01:00
my $ virtdev = $ 1 ;
2013-01-07 06:49:11 +01:00
my $ value = $ 3 ;
2016-02-10 14:28:18 +01:00
my $ di = parse_drive ( $ virtdev , $ value ) ;
if ( defined ( $ di - > { backup } ) && ! $ di - > { backup } ) {
2021-03-08 13:26:57 +01:00
$ res . = "#$line" ;
2016-02-10 14:28:19 +01:00
} elsif ( $ map - > { $ virtdev } ) {
2013-05-21 12:02:41 +02:00
delete $ di - > { format } ; # format can change on restore
2012-12-12 15:35:26 +01:00
$ di - > { file } = $ map - > { $ virtdev } ;
2019-12-05 16:11:01 +01:00
$ value = print_drive ( $ di ) ;
2021-03-08 13:26:57 +01:00
$ res . = "$virtdev: $value\n" ;
2012-12-12 15:35:26 +01:00
} else {
2021-03-08 13:26:57 +01:00
$ res . = $ line ;
2012-12-12 15:35:26 +01:00
}
2018-09-19 13:31:19 +02:00
} elsif ( ( $ line =~ m/^vmgenid: (.*)/ ) ) {
2018-09-19 14:15:56 +02:00
my $ vmgenid = $ 1 ;
2018-09-19 11:35:11 +02:00
if ( $ vmgenid ne '0' ) {
2018-09-19 13:31:19 +02:00
# always generate a new vmgenid if there was a valid one setup
2018-09-19 11:35:11 +02:00
$ vmgenid = generate_uuid ( ) ;
}
2021-03-08 13:26:57 +01:00
$ res . = "vmgenid: $vmgenid\n" ;
2018-02-01 14:51:05 +01:00
} elsif ( ( $ line =~ m/^(smbios1: )(.*)/ ) && $ unique ) {
my ( $ uuid , $ uuid_str ) ;
UUID:: generate ( $ uuid ) ;
UUID:: unparse ( $ uuid , $ uuid_str ) ;
my $ smbios1 = parse_smbios1 ( $ 2 ) ;
$ smbios1 - > { uuid } = $ uuid_str ;
2021-03-08 13:26:57 +01:00
$ res . = $ 1 . print_smbios1 ( $ smbios1 ) . "\n" ;
2012-12-12 15:35:26 +01:00
} else {
2021-03-08 13:26:57 +01:00
$ res . = $ line ;
2012-12-12 15:35:26 +01:00
}
2021-03-08 13:26:57 +01:00
return $ res ;
2021-03-18 10:44:49 +01:00
}
2020-03-11 07:55:55 +01:00
my $ restore_deactivate_volumes = sub {
2022-04-21 13:26:52 +02:00
my ( $ storecfg , $ virtdev_hash ) = @ _ ;
2020-03-11 07:55:55 +01:00
my $ vollist = [] ;
2022-04-21 13:26:52 +02:00
for my $ dev ( values $ virtdev_hash - > % * ) {
push $ vollist - > @ * , $ dev - > { volid } if $ dev - > { volid } ;
2020-03-11 07:55:55 +01:00
}
2022-04-21 13:26:51 +02:00
eval { PVE::Storage:: deactivate_volumes ( $ storecfg , $ vollist ) ; } ;
print STDERR $@ if $@ ;
2020-03-11 07:55:55 +01:00
} ;
my $ restore_destroy_volumes = sub {
2022-04-21 13:26:52 +02:00
my ( $ storecfg , $ virtdev_hash ) = @ _ ;
2020-03-11 07:55:55 +01:00
2022-04-21 13:26:52 +02:00
for my $ dev ( values $ virtdev_hash - > % * ) {
my $ volid = $ dev - > { volid } or next ;
2020-03-11 07:55:55 +01:00
eval {
2022-04-21 13:26:50 +02:00
PVE::Storage:: vdisk_free ( $ storecfg , $ volid ) ;
2020-03-11 07:55:55 +01:00
print STDERR "temporary volume '$volid' sucessfuly removed\n" ;
} ;
print STDERR "unable to cleanup '$volid' - $@" if $@ ;
}
} ;
2012-12-12 15:35:26 +01:00
2022-04-26 14:30:51 +02:00
my $ restore_merge_config = sub {
my ( $ filename , $ backup_conf_raw , $ override_conf ) = @ _ ;
my $ backup_conf = parse_vm_config ( $ filename , $ backup_conf_raw ) ;
for my $ key ( keys $ override_conf - > % * ) {
$ backup_conf - > { $ key } = $ override_conf - > { $ key } ;
}
return $ backup_conf ;
} ;
2012-12-12 15:35:26 +01:00
sub scan_volids {
2021-06-04 15:49:29 +02:00
my ( $ cfg , $ vmid ) = @ _ ;
2012-12-12 15:35:26 +01:00
2021-06-04 15:49:29 +02:00
my $ info = PVE::Storage:: vdisk_list ( $ cfg , undef , $ vmid , undef , 'images' ) ;
2012-12-12 15:35:26 +01:00
my $ volid_hash = { } ;
foreach my $ storeid ( keys %$ info ) {
foreach my $ item ( @ { $ info - > { $ storeid } } ) {
next if ! ( $ item - > { volid } && $ item - > { size } ) ;
2013-05-27 08:25:39 +02:00
$ item - > { path } = PVE::Storage:: path ( $ cfg , $ item - > { volid } ) ;
2012-12-12 15:35:26 +01:00
$ volid_hash - > { $ item - > { volid } } = $ item ;
}
}
return $ volid_hash ;
}
2019-12-09 14:08:09 +01:00
sub update_disk_config {
2012-12-12 15:35:26 +01:00
my ( $ vmid , $ conf , $ volid_hash ) = @ _ ;
2013-07-15 09:13:31 +02:00
2012-12-12 15:35:26 +01:00
my $ changes ;
2020-05-20 10:20:37 +02:00
my $ prefix = "VM $vmid" ;
2012-12-12 15:35:26 +01:00
2017-11-16 09:20:56 +01:00
# used and unused disks
my $ referenced = { } ;
2012-12-12 15:35:26 +01:00
2013-05-27 08:25:39 +02:00
# Note: it is allowed to define multiple storages with same path (alias), so
# we need to check both 'volid' and real 'path' (two different volid can point
# to the same path).
2017-11-16 09:20:56 +01:00
my $ referencedpath = { } ;
2013-07-15 09:13:31 +02:00
2012-12-12 15:35:26 +01:00
# update size info
2020-04-01 13:02:59 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2020-03-26 09:09:37 +01:00
my ( $ opt , $ drive ) = @ _ ;
2012-12-12 15:35:26 +01:00
2020-03-26 09:09:37 +01:00
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
2020-09-02 14:07:02 +02:00
my $ volume = $ volid_hash - > { $ volid } ;
2012-12-12 15:35:26 +01:00
2020-03-26 09:09:37 +01:00
# mark volid as "in-use" for next step
$ referenced - > { $ volid } = 1 ;
2020-09-02 14:07:02 +02:00
if ( $ volume && ( my $ path = $ volume - > { path } ) ) {
2020-03-26 09:09:37 +01:00
$ referencedpath - > { $ path } = 1 ;
2012-12-12 15:35:26 +01:00
}
2020-03-26 09:09:37 +01:00
return if drive_is_cdrom ( $ drive ) ;
2020-09-02 14:07:02 +02:00
return if ! $ volume ;
2020-03-26 09:09:37 +01:00
2020-09-02 14:07:02 +02:00
my ( $ updated , $ msg ) = PVE::QemuServer::Drive:: update_disksize ( $ drive , $ volume - > { size } ) ;
2020-03-26 09:09:37 +01:00
if ( defined ( $ updated ) ) {
$ changes = 1 ;
$ conf - > { $ opt } = print_drive ( $ updated ) ;
2020-05-20 10:20:37 +02:00
print "$prefix ($opt): $msg\n" ;
2020-03-26 09:09:37 +01:00
}
} ) ;
2012-12-12 15:35:26 +01:00
2013-05-27 08:25:39 +02:00
# remove 'unusedX' entry if volume is used
2020-03-26 09:09:37 +01:00
PVE::QemuConfig - > foreach_unused_volume ( $ conf , sub {
my ( $ opt , $ drive ) = @ _ ;
my $ volid = $ drive - > { file } ;
return if ! $ volid ;
2020-10-16 17:52:51 +02:00
my $ path ;
$ path = $ volid_hash - > { $ volid } - > { path } if $ volid_hash - > { $ volid } ;
2017-11-16 09:20:56 +01:00
if ( $ referenced - > { $ volid } || ( $ path && $ referencedpath - > { $ path } ) ) {
2019-12-09 14:08:09 +01:00
print "$prefix remove entry '$opt', its volume '$volid' is in use\n" ;
2013-05-27 08:25:39 +02:00
$ changes = 1 ;
delete $ conf - > { $ opt } ;
}
2017-11-16 09:20:56 +01:00
$ referenced - > { $ volid } = 1 ;
$ referencedpath - > { $ path } = 1 if $ path ;
2020-03-26 09:09:37 +01:00
} ) ;
2013-05-27 08:25:39 +02:00
2012-12-12 15:35:26 +01:00
foreach my $ volid ( sort keys %$ volid_hash ) {
next if $ volid =~ m/vm-$vmid-state-/ ;
2017-11-16 09:20:56 +01:00
next if $ referenced - > { $ volid } ;
2013-05-27 08:25:39 +02:00
my $ path = $ volid_hash - > { $ volid } - > { path } ;
next if ! $ path ; # just to be sure
2017-11-16 09:20:56 +01:00
next if $ referencedpath - > { $ path } ;
2012-12-12 15:35:26 +01:00
$ changes = 1 ;
2018-07-11 08:40:06 +02:00
my $ key = PVE::QemuConfig - > add_unused_volume ( $ conf , $ volid ) ;
2019-12-09 14:08:09 +01:00
print "$prefix add unreferenced volume '$volid' as '$key' to config\n" ;
2017-11-16 09:20:56 +01:00
$ referencedpath - > { $ path } = 1 ; # avoid to add more than once (aliases)
2012-12-12 15:35:26 +01:00
}
return $ changes ;
}
sub rescan {
2018-07-11 08:40:07 +02:00
my ( $ vmid , $ nolock , $ dryrun ) = @ _ ;
2012-12-12 15:35:26 +01:00
2016-03-25 14:01:36 +01:00
my $ cfg = PVE::Storage:: config ( ) ;
2012-12-12 15:35:26 +01:00
2018-07-11 08:40:06 +02:00
print "rescan volumes...\n" ;
2021-06-04 15:49:29 +02:00
my $ volid_hash = scan_volids ( $ cfg , $ vmid ) ;
2012-12-12 15:35:26 +01:00
my $ updatefn = sub {
my ( $ vmid ) = @ _ ;
2016-03-07 12:41:12 +01:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
2013-07-15 09:13:31 +02:00
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > check_lock ( $ conf ) ;
2012-12-12 15:35:26 +01:00
2013-04-19 10:45:46 +02:00
my $ vm_volids = { } ;
foreach my $ volid ( keys %$ volid_hash ) {
my $ info = $ volid_hash - > { $ volid } ;
$ vm_volids - > { $ volid } = $ info if $ info - > { vmid } && $ info - > { vmid } == $ vmid ;
}
2019-12-09 14:08:09 +01:00
my $ changes = update_disk_config ( $ vmid , $ conf , $ vm_volids ) ;
2012-12-12 15:35:26 +01:00
2018-07-11 08:40:07 +02:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) if $ changes && ! $ dryrun ;
2012-12-12 15:35:26 +01:00
} ;
if ( defined ( $ vmid ) ) {
if ( $ nolock ) {
& $ updatefn ( $ vmid ) ;
} else {
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , $ updatefn , $ vmid ) ;
2012-12-12 15:35:26 +01:00
}
} else {
my $ vmlist = config_list ( ) ;
foreach my $ vmid ( keys %$ vmlist ) {
if ( $ nolock ) {
& $ updatefn ( $ vmid ) ;
} else {
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > lock_config ( $ vmid , $ updatefn , $ vmid ) ;
2013-07-15 09:13:31 +02:00
}
2012-12-12 15:35:26 +01:00
}
}
}
2020-03-11 07:55:55 +01:00
sub restore_proxmox_backup_archive {
my ( $ archive , $ vmid , $ user , $ options ) = @ _ ;
my $ storecfg = PVE::Storage:: config ( ) ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ archive ) ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
my $ fingerprint = $ scfg - > { fingerprint } ;
2020-07-20 10:26:21 +02:00
my $ keyfile = PVE::Storage::PBSPlugin:: pbs_encryption_key_file_name ( $ storecfg , $ storeid ) ;
2020-03-11 07:55:55 +01:00
2020-12-03 12:43:40 +01:00
my $ repo = PVE::PBSClient:: get_repository ( $ scfg ) ;
2022-06-22 14:57:35 +02:00
my $ namespace = $ scfg - > { namespace } ;
2020-07-10 11:44:53 +02:00
2021-03-03 10:56:09 +01:00
# This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
2020-03-11 07:55:55 +01:00
my $ password = PVE::Storage::PBSPlugin:: pbs_get_password ( $ scfg , $ storeid ) ;
local $ ENV { PBS_PASSWORD } = $ password ;
local $ ENV { PBS_FINGERPRINT } = $ fingerprint if defined ( $ fingerprint ) ;
my ( $ vtype , $ pbs_backup_name , undef , undef , undef , undef , $ format ) =
PVE::Storage:: parse_volname ( $ storecfg , $ archive ) ;
die "got unexpected vtype '$vtype'\n" if $ vtype ne 'backup' ;
die "got unexpected backup format '$format'\n" if $ format ne 'pbs-vm' ;
my $ tmpdir = "/var/tmp/vzdumptmp$$" ;
rmtree $ tmpdir ;
mkpath $ tmpdir ;
my $ conffile = PVE::QemuConfig - > config_file ( $ vmid ) ;
# disable interrupts (always do cleanups)
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } = sub { print STDERR "got interrupt - ignored\n" ; } ;
# Note: $oldconf is undef if VM does not exists
my $ cfs_path = PVE::QemuConfig - > cfs_config_path ( $ vmid ) ;
my $ oldconf = PVE::Cluster:: cfs_read_file ( $ cfs_path ) ;
2021-03-08 13:26:57 +01:00
my $ new_conf_raw = '' ;
2020-03-11 07:55:55 +01:00
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
2022-04-21 13:26:52 +02:00
my $ devinfo = { } ; # info about drives included in backup
my $ virtdev_hash = { } ; # info about allocated drives
2020-03-11 07:55:55 +01:00
eval {
# enable interrupts
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } =
local $ SIG { PIPE } = sub { die "interrupted by signal\n" ; } ;
my $ cfgfn = "$tmpdir/qemu-server.conf" ;
my $ firewall_config_fn = "$tmpdir/fw.conf" ;
my $ index_fn = "$tmpdir/index.json" ;
my $ cmd = "restore" ;
my $ param = [ $ pbs_backup_name , "index.json" , $ index_fn ] ;
PVE::Storage::PBSPlugin:: run_raw_client_cmd ( $ scfg , $ storeid , $ cmd , $ param ) ;
my $ index = PVE::Tools:: file_get_contents ( $ index_fn ) ;
$ index = decode_json ( $ index ) ;
foreach my $ info ( @ { $ index - > { files } } ) {
if ( $ info - > { filename } =~ m/^(drive-\S+).img.fidx$/ ) {
my $ devname = $ 1 ;
if ( $ info - > { size } =~ m/^(\d+)$/ ) { # untaint size
$ devinfo - > { $ devname } - > { size } = $ 1 ;
} else {
die "unable to parse file size in 'index.json' - got '$info->{size}'\n" ;
}
}
}
2020-09-02 14:07:02 +02:00
my $ is_qemu_server_backup = scalar (
grep { $ _ - > { filename } eq 'qemu-server.conf.blob' } @ { $ index - > { files } }
) ;
2020-03-11 07:55:55 +01:00
if ( ! $ is_qemu_server_backup ) {
die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n" ;
}
my $ has_firewall_config = scalar ( grep { $ _ - > { filename } eq 'fw.conf.blob' } @ { $ index - > { files } } ) ;
$ param = [ $ pbs_backup_name , "qemu-server.conf" , $ cfgfn ] ;
PVE::Storage::PBSPlugin:: run_raw_client_cmd ( $ scfg , $ storeid , $ cmd , $ param ) ;
if ( $ has_firewall_config ) {
$ param = [ $ pbs_backup_name , "fw.conf" , $ firewall_config_fn ] ;
PVE::Storage::PBSPlugin:: run_raw_client_cmd ( $ scfg , $ storeid , $ cmd , $ param ) ;
my $ pve_firewall_dir = '/etc/pve/firewall' ;
mkdir $ pve_firewall_dir ; # make sure the dir exists
PVE::Tools:: file_copy ( $ firewall_config_fn , "${pve_firewall_dir}/$vmid.fw" ) ;
}
my $ fh = IO::File - > new ( $ cfgfn , "r" ) ||
2020-08-04 13:32:44 +02:00
die "unable to read qemu-server.conf - $!\n" ;
2020-03-11 07:55:55 +01:00
2022-04-21 13:26:52 +02:00
$ virtdev_hash = $ parse_backup_hints - > ( $ rpcenv , $ user , $ storecfg , $ fh , $ devinfo , $ options ) ;
2020-03-11 07:55:55 +01:00
# fixme: rate limit?
# create empty/temp config
PVE::Tools:: file_set_contents ( $ conffile , "memory: 128\nlock: create" ) ;
$ restore_cleanup_oldconf - > ( $ storecfg , $ vmid , $ oldconf , $ virtdev_hash ) if $ oldconf ;
# allocate volumes
my $ map = $ restore_allocate_devices - > ( $ storecfg , $ virtdev_hash , $ vmid ) ;
2021-07-01 11:37:29 +02:00
foreach my $ virtdev ( sort keys %$ virtdev_hash ) {
my $ d = $ virtdev_hash - > { $ virtdev } ;
next if $ d - > { is_cloudinit } ; # no need to restore cloudinit
2020-03-11 07:55:55 +01:00
2021-07-08 13:25:33 +02:00
# this fails if storage is unavailable
2021-07-01 11:37:29 +02:00
my $ volid = $ d - > { volid } ;
my $ path = PVE::Storage:: path ( $ storecfg , $ volid ) ;
2020-03-11 07:55:55 +01:00
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
# for live-restore we only want to preload the efidisk and TPM state
next if $ options - > { live } && $ virtdev ne 'efidisk0' && $ virtdev ne 'tpmstate0' ;
2021-07-08 13:25:33 +02:00
2022-05-12 10:42:32 +02:00
my @ ns_arg ;
if ( defined ( my $ ns = $ scfg - > { namespace } ) ) {
@ ns_arg = ( '--ns' , $ ns ) ;
}
2021-07-01 11:37:29 +02:00
my $ pbs_restore_cmd = [
'/usr/bin/pbs-restore' ,
'--repository' , $ repo ,
2022-05-12 10:42:32 +02:00
@ ns_arg ,
2021-07-01 11:37:29 +02:00
$ pbs_backup_name ,
"$d->{devname}.img.fidx" ,
$ path ,
'--verbose' ,
] ;
2020-06-04 10:52:42 +02:00
2021-07-01 11:37:29 +02:00
push @$ pbs_restore_cmd , '--format' , $ d - > { format } if $ d - > { format } ;
push @$ pbs_restore_cmd , '--keyfile' , $ keyfile if - e $ keyfile ;
2020-03-11 07:55:55 +01:00
2021-07-01 11:37:29 +02:00
if ( PVE::Storage:: volume_has_feature ( $ storecfg , 'sparseinit' , $ volid ) ) {
push @$ pbs_restore_cmd , '--skip-zero' ;
2021-03-03 10:56:09 +01:00
}
2021-07-01 11:37:29 +02:00
my $ dbg_cmdstring = PVE::Tools:: cmd2string ( $ pbs_restore_cmd ) ;
print "restore proxmox backup image: $dbg_cmdstring\n" ;
run_command ( $ pbs_restore_cmd ) ;
2020-03-11 07:55:55 +01:00
}
$ fh - > seek ( 0 , 0 ) || die "seek failed - $!\n" ;
my $ cookie = { netcount = > 0 } ;
while ( defined ( my $ line = <$fh> ) ) {
2021-03-18 10:44:49 +01:00
$ new_conf_raw . = restore_update_config_line (
2021-03-08 13:26:57 +01:00
$ cookie ,
$ map ,
$ line ,
$ options - > { unique } ,
) ;
2020-03-11 07:55:55 +01:00
}
$ fh - > close ( ) ;
} ;
my $ err = $@ ;
2021-03-03 10:56:09 +01:00
if ( $ err || ! $ options - > { live } ) {
2022-04-21 13:26:52 +02:00
$ restore_deactivate_volumes - > ( $ storecfg , $ virtdev_hash ) ;
2021-03-03 10:56:09 +01:00
}
2020-03-11 07:55:55 +01:00
rmtree $ tmpdir ;
if ( $ err ) {
2022-04-21 13:26:52 +02:00
$ restore_destroy_volumes - > ( $ storecfg , $ virtdev_hash ) ;
2020-03-11 07:55:55 +01:00
die $ err ;
}
2021-04-21 16:25:25 +02:00
if ( $ options - > { live } ) {
# keep lock during live-restore
$ new_conf_raw . = "\nlock: create" ;
}
2022-04-26 14:30:51 +02:00
my $ new_conf = $ restore_merge_config - > ( $ conffile , $ new_conf_raw , $ options - > { override_conf } ) ;
PVE::QemuConfig - > write_config ( $ vmid , $ new_conf ) ;
2020-03-11 07:55:55 +01:00
eval { rescan ( $ vmid , 1 ) ; } ;
warn $@ if $@ ;
2021-03-03 10:56:09 +01:00
PVE::AccessControl:: add_vm_to_pool ( $ vmid , $ options - > { pool } ) if $ options - > { pool } ;
if ( $ options - > { live } ) {
2021-04-21 16:25:24 +02:00
# enable interrupts
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } =
local $ SIG { PIPE } = sub { die "got signal ($!) - abort\n" ; } ;
2021-03-03 10:56:09 +01:00
2021-04-21 16:25:24 +02:00
my $ conf = PVE::QemuConfig - > load_config ( $ vmid ) ;
die "cannot do live-restore for template\n" if PVE::QemuConfig - > is_template ( $ conf ) ;
2021-03-03 10:56:09 +01:00
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
# these special drives are already restored before start
delete $ devinfo - > { 'drive-efidisk0' } ;
delete $ devinfo - > { 'drive-tpmstate0-backup' } ;
2022-06-22 14:57:35 +02:00
my $ pbs_opts = {
repo = > $ repo ,
keyfile = > $ keyfile ,
snapshot = > $ pbs_backup_name ,
namespace = > $ namespace ,
} ;
pbs_live_restore ( $ vmid , $ conf , $ storecfg , $ devinfo , $ pbs_opts ) ;
2021-04-21 16:25:25 +02:00
PVE::QemuConfig - > remove_lock ( $ vmid , "create" ) ;
2021-03-03 10:56:09 +01:00
}
}
sub pbs_live_restore {
2022-06-22 14:57:35 +02:00
my ( $ vmid , $ conf , $ storecfg , $ restored_disks , $ opts ) = @ _ ;
2021-03-03 10:56:09 +01:00
2021-05-18 17:13:44 +02:00
print "starting VM for live-restore\n" ;
2022-06-22 14:57:35 +02:00
print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n" ;
2021-03-03 10:56:09 +01:00
my $ pbs_backing = { } ;
2021-04-06 16:51:04 +02:00
for my $ ds ( keys %$ restored_disks ) {
2021-03-03 10:56:09 +01:00
$ ds =~ m/^drive-(.*)$/ ;
2021-05-18 17:13:44 +02:00
my $ confname = $ 1 ;
$ pbs_backing - > { $ confname } = {
2022-06-22 14:57:35 +02:00
repository = > $ opts - > { repo } ,
snapshot = > $ opts - > { snapshot } ,
2021-03-03 10:56:09 +01:00
archive = > "$ds.img.fidx" ,
} ;
2022-06-22 14:57:35 +02:00
$ pbs_backing - > { $ confname } - > { keyfile } = $ opts - > { keyfile } if - e $ opts - > { keyfile } ;
$ pbs_backing - > { $ confname } - > { namespace } = $ opts - > { namespace } if defined ( $ opts - > { namespace } ) ;
2021-05-18 17:13:44 +02:00
my $ drive = parse_drive ( $ confname , $ conf - > { $ confname } ) ;
print "restoring '$ds' to '$drive->{file}'\n" ;
2021-03-03 10:56:09 +01:00
}
2021-04-15 17:50:13 +02:00
my $ drives_streamed = 0 ;
2021-03-03 10:56:09 +01:00
eval {
# make sure HA doesn't interrupt our restore by stopping the VM
if ( PVE::HA::Config:: vm_is_ha_managed ( $ vmid ) ) {
2021-04-15 17:50:13 +02:00
run_command ( [ 'ha-manager' , 'set' , "vm:$vmid" , '--state' , 'started' ] ) ;
2021-03-03 10:56:09 +01:00
}
2021-04-15 17:50:13 +02:00
# start VM with backing chain pointing to PBS backup, environment vars for PBS driver
# in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
2021-04-06 19:43:03 +02:00
vm_start_nolock ( $ storecfg , $ vmid , $ conf , { paused = > 1 , 'pbs-backing' = > $ pbs_backing } , { } ) ;
2021-03-03 10:56:09 +01:00
2021-03-03 10:56:11 +01:00
my $ qmeventd_fd = register_qmeventd_handle ( $ vmid ) ;
2021-03-03 10:56:09 +01:00
# begin streaming, i.e. data copy from PBS to target disk for every vol,
# this will effectively collapse the backing image chain consisting of
# [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
# removes itself once all backing images vanish with 'auto-remove=on')
my $ jobs = { } ;
2021-04-06 16:51:04 +02:00
for my $ ds ( sort keys %$ restored_disks ) {
2021-03-03 10:56:09 +01:00
my $ job_id = "restore-$ds" ;
mon_cmd ( $ vmid , 'block-stream' ,
'job-id' = > $ job_id ,
device = > "$ds" ,
) ;
$ jobs - > { $ job_id } = { } ;
}
mon_cmd ( $ vmid , 'cont' ) ;
qemu_drive_mirror_monitor ( $ vmid , undef , $ jobs , 'auto' , 0 , 'stream' ) ;
2021-04-06 19:38:53 +02:00
print "restore-drive jobs finished successfully, removing all tracking block devices"
. " to disconnect from Proxmox Backup Server\n" ;
2021-04-06 16:51:04 +02:00
for my $ ds ( sort keys %$ restored_disks ) {
2021-03-03 10:56:09 +01:00
mon_cmd ( $ vmid , 'blockdev-del' , 'node-name' = > "$ds-pbs" ) ;
}
2021-03-03 10:56:11 +01:00
close ( $ qmeventd_fd ) ;
2021-03-03 10:56:09 +01:00
} ;
my $ err = $@ ;
if ( $ err ) {
2022-03-25 09:06:58 +01:00
warn "An error occurred during live-restore: $err\n" ;
2021-03-03 10:56:09 +01:00
_do_vm_stop ( $ storecfg , $ vmid , 1 , 1 , 10 , 0 , 1 ) ;
die "live-restore failed\n" ;
}
2020-03-11 07:55:55 +01:00
}
2012-12-12 15:35:26 +01:00
sub restore_vma_archive {
my ( $ archive , $ vmid , $ user , $ opts , $ comp ) = @ _ ;
my $ readfrom = $ archive ;
2018-02-22 17:15:24 +01:00
my $ cfg = PVE::Storage:: config ( ) ;
my $ commands = [] ;
my $ bwlimit = $ opts - > { bwlimit } ;
my $ dbg_cmdstring = '' ;
my $ add_pipe = sub {
my ( $ cmd ) = @ _ ;
push @$ commands , $ cmd ;
$ dbg_cmdstring . = ' | ' if length ( $ dbg_cmdstring ) ;
$ dbg_cmdstring . = PVE::Tools:: cmd2string ( $ cmd ) ;
2012-12-12 15:35:26 +01:00
$ readfrom = '-' ;
2018-02-22 17:15:24 +01:00
} ;
my $ input = undef ;
if ( $ archive eq '-' ) {
$ input = '<&STDIN' ;
} else {
# If we use a backup from a PVE defined storage we also consider that
# storage's rate limit:
my ( undef , $ volid ) = PVE::Storage:: path_to_volume_id ( $ cfg , $ archive ) ;
if ( defined ( $ volid ) ) {
my ( $ sid , undef ) = PVE::Storage:: parse_volume_id ( $ volid ) ;
my $ readlimit = PVE::Storage:: get_bandwidth_limit ( 'restore' , [ $ sid ] , $ bwlimit ) ;
if ( $ readlimit ) {
print STDERR "applying read rate limit: $readlimit\n" ;
2018-03-21 11:12:26 +01:00
my $ cstream = [ 'cstream' , '-t' , $ readlimit * 1024 , '--' , $ readfrom ] ;
2018-02-22 17:15:24 +01:00
$ add_pipe - > ( $ cstream ) ;
}
}
}
if ( $ comp ) {
2020-04-28 15:58:12 +02:00
my $ info = PVE::Storage:: decompressor_info ( 'vma' , $ comp ) ;
my $ cmd = $ info - > { decompressor } ;
push @$ cmd , $ readfrom ;
2018-02-22 17:15:24 +01:00
$ add_pipe - > ( $ cmd ) ;
2012-12-12 15:35:26 +01:00
}
my $ tmpdir = "/var/tmp/vzdumptmp$$" ;
rmtree $ tmpdir ;
# disable interrupts (always do cleanups)
2017-09-06 13:29:07 +02:00
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } = sub { warn "got interrupt - ignored\n" ; } ;
2012-12-12 15:35:26 +01:00
my $ mapfifo = "/var/tmp/vzdumptmp$$.fifo" ;
POSIX:: mkfifo ( $ mapfifo , 0600 ) ;
my $ fifofh ;
2020-10-19 15:30:21 +02:00
my $ openfifo = sub { open ( $ fifofh , '>' , $ mapfifo ) or die $! } ;
2012-12-12 15:35:26 +01:00
2018-02-22 17:15:24 +01:00
$ add_pipe - > ( [ 'vma' , 'extract' , '-v' , '-r' , $ mapfifo , $ readfrom , $ tmpdir ] ) ;
2012-12-12 15:35:26 +01:00
my $ oldtimeout ;
my $ timeout = 5 ;
2022-04-21 13:26:52 +02:00
my $ devinfo = { } ; # info about drives included in backup
my $ virtdev_hash = { } ; # info about allocated drives
2012-12-12 15:35:26 +01:00
my $ rpcenv = PVE::RPCEnvironment:: get ( ) ;
2016-03-07 12:41:12 +01:00
my $ conffile = PVE::QemuConfig - > config_file ( $ vmid ) ;
2012-12-12 15:35:26 +01:00
2019-12-13 12:11:58 +01:00
# Note: $oldconf is undef if VM does not exist
2016-03-07 12:41:12 +01:00
my $ cfs_path = PVE::QemuConfig - > cfs_config_path ( $ vmid ) ;
my $ oldconf = PVE::Cluster:: cfs_read_file ( $ cfs_path ) ;
2021-03-08 13:26:57 +01:00
my $ new_conf_raw = '' ;
2013-01-04 06:57:11 +01:00
2018-02-22 17:15:24 +01:00
my % storage_limits ;
2012-12-12 15:35:26 +01:00
my $ print_devmap = sub {
my $ cfgfn = "$tmpdir/qemu-server.conf" ;
# we can read the config - that is already extracted
my $ fh = IO::File - > new ( $ cfgfn , "r" ) ||
2020-08-04 13:32:44 +02:00
die "unable to read qemu-server.conf - $!\n" ;
2012-12-12 15:35:26 +01:00
2015-11-25 10:20:04 +01:00
my $ fwcfgfn = "$tmpdir/qemu-server.fw" ;
2016-01-28 09:00:41 +01:00
if ( - f $ fwcfgfn ) {
my $ pve_firewall_dir = '/etc/pve/firewall' ;
mkdir $ pve_firewall_dir ; # make sure the dir exists
PVE::Tools:: file_copy ( $ fwcfgfn , "${pve_firewall_dir}/$vmid.fw" ) ;
}
2015-11-25 10:20:04 +01:00
2022-04-21 13:26:52 +02:00
$ virtdev_hash = $ parse_backup_hints - > ( $ rpcenv , $ user , $ cfg , $ fh , $ devinfo , $ opts ) ;
2012-12-12 15:35:26 +01:00
2021-03-15 12:57:27 +01:00
foreach my $ info ( values % { $ virtdev_hash } ) {
my $ storeid = $ info - > { storeid } ;
next if defined ( $ storage_limits { $ storeid } ) ;
my $ limit = PVE::Storage:: get_bandwidth_limit ( 'restore' , [ $ storeid ] , $ bwlimit ) // 0 ;
print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $ limit ;
$ storage_limits { $ storeid } = $ limit * 1024 ;
2018-02-22 17:15:24 +01:00
}
2012-12-12 15:35:26 +01:00
foreach my $ devname ( keys %$ devinfo ) {
2013-07-15 09:13:31 +02:00
die "found no device mapping information for device '$devname'\n"
if ! $ devinfo - > { $ devname } - > { virtdev } ;
2012-12-12 15:35:26 +01:00
}
2013-01-04 06:57:11 +01:00
# create empty/temp config
2013-07-15 09:13:31 +02:00
if ( $ oldconf ) {
2013-01-04 06:57:11 +01:00
PVE::Tools:: file_set_contents ( $ conffile , "memory: 128\n" ) ;
2020-03-11 07:55:54 +01:00
$ restore_cleanup_oldconf - > ( $ cfg , $ vmid , $ oldconf , $ virtdev_hash ) ;
2013-01-04 06:57:11 +01:00
}
2020-03-11 07:55:55 +01:00
# allocate volumes
my $ map = $ restore_allocate_devices - > ( $ cfg , $ virtdev_hash , $ vmid ) ;
# print restore information to $fifofh
2012-12-12 15:35:26 +01:00
foreach my $ virtdev ( sort keys %$ virtdev_hash ) {
my $ d = $ virtdev_hash - > { $ virtdev } ;
2020-03-11 07:55:55 +01:00
next if $ d - > { is_cloudinit } ; # no need to restore cloudinit
2018-02-22 17:15:24 +01:00
my $ storeid = $ d - > { storeid } ;
2020-03-11 07:55:55 +01:00
my $ volid = $ d - > { volid } ;
2018-02-22 17:15:24 +01:00
my $ map_opts = '' ;
if ( my $ limit = $ storage_limits { $ storeid } ) {
$ map_opts . = "throttling.bps=$limit:throttling.group=$storeid:" ;
}
2013-05-21 12:02:41 +02:00
2012-12-12 15:35:26 +01:00
my $ write_zeros = 1 ;
2016-02-23 12:43:51 +01:00
if ( PVE::Storage:: volume_has_feature ( $ cfg , 'sparseinit' , $ volid ) ) {
2012-12-12 15:35:26 +01:00
$ write_zeros = 0 ;
}
2020-03-11 07:55:55 +01:00
my $ path = PVE::Storage:: path ( $ cfg , $ volid ) ;
2019-05-17 10:53:30 +02:00
2020-03-11 07:55:55 +01:00
print $ fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n" ;
2012-12-12 15:35:26 +01:00
2020-03-11 07:55:55 +01:00
print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n" ;
2012-12-12 15:35:26 +01:00
}
$ fh - > seek ( 0 , 0 ) || die "seek failed - $!\n" ;
my $ cookie = { netcount = > 0 } ;
while ( defined ( my $ line = <$fh> ) ) {
2021-03-18 10:44:49 +01:00
$ new_conf_raw . = restore_update_config_line (
2021-03-08 13:26:57 +01:00
$ cookie ,
$ map ,
$ line ,
$ opts - > { unique } ,
) ;
2012-12-12 15:35:26 +01:00
}
$ fh - > close ( ) ;
} ;
eval {
# enable interrupts
2017-09-14 15:19:39 +02:00
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } =
local $ SIG { PIPE } = sub { die "interrupted by signal\n" ; } ;
2012-12-12 15:35:26 +01:00
local $ SIG { ALRM } = sub { die "got timeout\n" ; } ;
$ oldtimeout = alarm ( $ timeout ) ;
my $ parser = sub {
my $ line = shift ;
print "$line\n" ;
if ( $ line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/ ) {
my ( $ dev_id , $ size , $ devname ) = ( $ 1 , $ 2 , $ 3 ) ;
$ devinfo - > { $ devname } = { size = > $ size , dev_id = > $ dev_id } ;
} elsif ( $ line =~ m/^CTIME: / ) {
2014-05-17 09:07:18 +02:00
# we correctly received the vma config, so we can disable
2014-04-17 10:37:46 +02:00
# the timeout now for disk allocation (set to 10 minutes, so
# that we always timeout if something goes wrong)
alarm ( 600 ) ;
2012-12-12 15:35:26 +01:00
& $ print_devmap ( ) ;
print $ fifofh "done\n" ;
my $ tmp = $ oldtimeout || 0 ;
$ oldtimeout = undef ;
alarm ( $ tmp ) ;
close ( $ fifofh ) ;
2020-10-19 15:30:21 +02:00
$ fifofh = undef ;
2012-12-12 15:35:26 +01:00
}
} ;
2013-07-15 09:13:31 +02:00
2018-02-22 17:15:24 +01:00
print "restore vma archive: $dbg_cmdstring\n" ;
run_command ( $ commands , input = > $ input , outfunc = > $ parser , afterfork = > $ openfifo ) ;
2012-12-12 15:35:26 +01:00
} ;
my $ err = $@ ;
alarm ( $ oldtimeout ) if $ oldtimeout ;
2022-04-21 13:26:52 +02:00
$ restore_deactivate_volumes - > ( $ cfg , $ virtdev_hash ) ;
2015-09-08 16:05:21 +02:00
2020-10-19 15:30:21 +02:00
close ( $ fifofh ) if $ fifofh ;
2012-12-12 15:35:26 +01:00
unlink $ mapfifo ;
2020-03-11 07:55:55 +01:00
rmtree $ tmpdir ;
2012-12-12 15:35:26 +01:00
if ( $ err ) {
2022-04-21 13:26:52 +02:00
$ restore_destroy_volumes - > ( $ cfg , $ virtdev_hash ) ;
2012-12-12 15:35:26 +01:00
die $ err ;
}
2022-04-26 14:30:51 +02:00
my $ new_conf = $ restore_merge_config - > ( $ conffile , $ new_conf_raw , $ opts - > { override_conf } ) ;
PVE::QemuConfig - > write_config ( $ vmid , $ new_conf ) ;
2013-01-04 06:57:11 +01:00
2012-12-12 15:35:26 +01:00
eval { rescan ( $ vmid , 1 ) ; } ;
warn $@ if $@ ;
2021-03-03 10:56:09 +01:00
PVE::AccessControl:: add_vm_to_pool ( $ vmid , $ opts - > { pool } ) if $ opts - > { pool } ;
2012-12-12 15:35:26 +01:00
}
sub restore_tar_archive {
my ( $ archive , $ vmid , $ user , $ opts ) = @ _ ;
2022-04-26 14:30:51 +02:00
if ( scalar ( keys $ opts - > { override_conf } - > % * ) > 0 ) {
my $ keystring = join ( ' ' , keys $ opts - > { override_conf } - > % * ) ;
die "cannot pass along options ($keystring) when restoring from tar archive\n" ;
}
2011-10-19 11:27:42 +02:00
if ( $ archive ne '-' ) {
2013-01-04 06:57:11 +01:00
my $ firstfile = tar_archive_read_firstfile ( $ archive ) ;
2020-10-14 13:40:17 +02:00
die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
2011-10-19 11:27:42 +02:00
if $ firstfile ne 'qemu-server.conf' ;
}
2011-10-17 13:49:48 +02:00
2016-03-25 14:01:36 +01:00
my $ storecfg = PVE::Storage:: config ( ) ;
2013-01-28 09:54:00 +01:00
2019-11-08 15:02:50 +01:00
# avoid zombie disks when restoring over an existing VM -> cleanup first
# pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
# skiplock=1 because qmrestore has set the 'create' lock itself already
2016-03-07 12:41:12 +01:00
my $ vmcfgfn = PVE::QemuConfig - > config_file ( $ vmid ) ;
2019-11-08 17:03:28 +01:00
destroy_vm ( $ storecfg , $ vmid , 1 , { lock = > 'restore' } ) if - f $ vmcfgfn ;
2013-01-04 06:57:11 +01:00
2011-10-17 13:49:48 +02:00
my $ tocmd = "/usr/lib/qemu-server/qmextract" ;
2011-10-25 11:37:56 +02:00
$ tocmd . = " --storage " . PVE::Tools:: shellquote ( $ opts - > { storage } ) if $ opts - > { storage } ;
2012-02-02 06:39:38 +01:00
$ tocmd . = " --pool " . PVE::Tools:: shellquote ( $ opts - > { pool } ) if $ opts - > { pool } ;
2011-10-17 13:49:48 +02:00
$ tocmd . = ' --prealloc' if $ opts - > { prealloc } ;
$ tocmd . = ' --info' if $ opts - > { info } ;
2012-02-02 06:39:38 +01:00
# tar option "xf" does not autodetect compression when read from STDIN,
2011-10-19 11:27:42 +02:00
# so we pipe to zcat
2011-10-25 11:37:56 +02:00
my $ cmd = "zcat -f|tar xf " . PVE::Tools:: shellquote ( $ archive ) . " " .
PVE::Tools:: shellquote ( "--to-command=$tocmd" ) ;
2011-10-17 13:49:48 +02:00
my $ tmpdir = "/var/tmp/vzdumptmp$$" ;
mkpath $ tmpdir ;
local $ ENV { VZDUMP_TMPDIR } = $ tmpdir ;
local $ ENV { VZDUMP_VMID } = $ vmid ;
2012-02-02 06:39:38 +01:00
local $ ENV { VZDUMP_USER } = $ user ;
2011-10-17 13:49:48 +02:00
2016-03-07 12:41:12 +01:00
my $ conffile = PVE::QemuConfig - > config_file ( $ vmid ) ;
2021-03-08 13:26:57 +01:00
my $ new_conf_raw = '' ;
2011-10-17 13:49:48 +02:00
# disable interrupts (always do cleanups)
2017-09-14 15:19:39 +02:00
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } = sub { print STDERR "got interrupt - ignored\n" ; } ;
2011-10-17 13:49:48 +02:00
2012-01-27 09:35:26 +01:00
eval {
2011-10-17 13:49:48 +02:00
# enable interrupts
2017-09-14 15:19:39 +02:00
local $ SIG { INT } =
local $ SIG { TERM } =
local $ SIG { QUIT } =
local $ SIG { HUP } =
local $ SIG { PIPE } = sub { die "interrupted by signal\n" ; } ;
2011-10-17 13:49:48 +02:00
2011-10-19 11:27:42 +02:00
if ( $ archive eq '-' ) {
print "extracting archive from STDIN\n" ;
run_command ( $ cmd , input = > "<&STDIN" ) ;
} else {
print "extracting archive '$archive'\n" ;
run_command ( $ cmd ) ;
}
2011-10-17 13:49:48 +02:00
return if $ opts - > { info } ;
# read new mapping
my $ map = { } ;
my $ statfile = "$tmpdir/qmrestore.stat" ;
if ( my $ fd = IO::File - > new ( $ statfile , "r" ) ) {
while ( defined ( my $ line = <$fd> ) ) {
if ( $ line =~ m/vzdump:([^\s:]*):(\S+)$/ ) {
$ map - > { $ 1 } = $ 2 if $ 1 ;
} else {
print STDERR "unable to parse line in statfile - $line\n" ;
}
}
$ fd - > close ( ) ;
}
my $ confsrc = "$tmpdir/qemu-server.conf" ;
2020-10-16 17:52:51 +02:00
my $ srcfd = IO::File - > new ( $ confsrc , "r" ) || die "unable to open file '$confsrc'\n" ;
2011-10-17 13:49:48 +02:00
2012-12-12 15:35:26 +01:00
my $ cookie = { netcount = > 0 } ;
2011-10-17 13:49:48 +02:00
while ( defined ( my $ line = <$srcfd> ) ) {
2021-03-18 10:44:49 +01:00
$ new_conf_raw . = restore_update_config_line (
2021-03-08 13:26:57 +01:00
$ cookie ,
$ map ,
$ line ,
$ opts - > { unique } ,
) ;
2011-10-17 13:49:48 +02:00
}
$ srcfd - > close ( ) ;
} ;
2019-11-08 15:43:14 +01:00
if ( my $ err = $@ ) {
2013-01-04 06:57:11 +01:00
tar_restore_cleanup ( $ storecfg , "$tmpdir/qmrestore.stat" ) if ! $ opts - > { info } ;
2011-10-17 13:49:48 +02:00
die $ err ;
2012-01-27 09:35:26 +01:00
}
2011-10-17 13:49:48 +02:00
rmtree $ tmpdir ;
2021-03-08 13:26:57 +01:00
PVE::Tools:: file_set_contents ( $ conffile , $ new_conf_raw ) ;
2012-12-12 15:35:26 +01:00
2013-01-04 06:57:11 +01:00
PVE::Cluster:: cfs_update ( ) ; # make sure we read new file
2012-12-12 15:35:26 +01:00
eval { rescan ( $ vmid , 1 ) ; } ;
warn $@ if $@ ;
2011-10-17 13:49:48 +02:00
} ;
2017-05-15 14:11:58 +02:00
sub foreach_storage_used_by_vm {
2012-09-12 11:59:48 +02:00
my ( $ conf , $ func ) = @ _ ;
my $ sidhash = { } ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2017-05-15 14:12:00 +02:00
my ( $ ds , $ drive ) = @ _ ;
return if drive_is_cdrom ( $ drive ) ;
2012-09-12 11:59:48 +02:00
my $ volid = $ drive - > { file } ;
my ( $ sid , $ volname ) = PVE::Storage:: parse_volume_id ( $ volid , 1 ) ;
2013-07-15 09:13:31 +02:00
$ sidhash - > { $ sid } = $ sid if $ sid ;
2017-05-15 14:12:00 +02:00
} ) ;
2012-09-12 11:59:48 +02:00
foreach my $ sid ( sort keys %$ sidhash ) {
& $ func ( $ sid ) ;
}
}
2019-11-20 19:06:15 +01:00
my $ qemu_snap_storage = {
rbd = > 1 ,
} ;
2015-05-06 09:57:34 +02:00
sub do_snapshots_with_qemu {
2021-10-14 11:28:48 +02:00
my ( $ storecfg , $ volid , $ deviceid ) = @ _ ;
return if $ deviceid =~ m/tpmstate0/ ;
2015-05-06 09:57:34 +02:00
my $ storage_name = PVE::Storage:: parse_volume_id ( $ volid ) ;
2019-06-04 17:40:42 +02:00
my $ scfg = $ storecfg - > { ids } - > { $ storage_name } ;
2020-10-16 17:52:51 +02:00
die "could not find storage '$storage_name'\n" if ! defined ( $ scfg ) ;
2015-05-06 09:57:34 +02:00
2019-06-04 17:40:42 +02:00
if ( $ qemu_snap_storage - > { $ scfg - > { type } } && ! $ scfg - > { krbd } ) {
2015-05-06 09:57:34 +02:00
return 1 ;
}
if ( $ volid =~ m/\.(qcow2|qed)$/ ) {
return 1 ;
}
2020-10-16 16:20:05 +02:00
return ;
2015-05-06 09:57:34 +02:00
}
2016-02-15 11:45:56 +01:00
sub qga_check_running {
2018-05-30 08:20:25 +02:00
my ( $ vmid , $ nowarn ) = @ _ ;
2016-02-15 11:45:56 +01:00
2019-11-19 12:23:47 +01:00
eval { mon_cmd ( $ vmid , "guest-ping" , timeout = > 3 ) ; } ;
2016-02-15 11:45:56 +01:00
if ( $@ ) {
2022-12-20 10:23:32 +01:00
warn "QEMU Guest Agent is not running - $@" if ! $ nowarn ;
2016-02-15 11:45:56 +01:00
return 0 ;
}
return 1 ;
}
2013-02-14 11:58:49 +01:00
sub template_create {
my ( $ vmid , $ conf , $ disk ) = @ _ ;
my $ storecfg = PVE::Storage:: config ( ) ;
2020-04-08 11:24:56 +02:00
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
2013-02-15 08:44:12 +01:00
my ( $ ds , $ drive ) = @ _ ;
return if drive_is_cdrom ( $ drive ) ;
return if $ disk && $ ds ne $ disk ;
my $ volid = $ drive - > { file } ;
2013-04-18 17:05:31 +02:00
return if ! PVE::Storage:: volume_has_feature ( $ storecfg , 'template' , $ volid ) ;
2013-02-15 08:44:12 +01:00
2013-02-14 11:58:49 +01:00
my $ voliddst = PVE::Storage:: vdisk_create_base ( $ storecfg , $ volid ) ;
$ drive - > { file } = $ voliddst ;
2019-12-05 16:11:01 +01:00
$ conf - > { $ ds } = print_drive ( $ drive ) ;
2016-03-07 12:41:12 +01:00
PVE::QemuConfig - > write_config ( $ vmid , $ conf ) ;
2013-02-14 11:58:49 +01:00
} ) ;
}
2019-03-07 13:43:11 +01:00
sub convert_iscsi_path {
my ( $ path ) = @ _ ;
if ( $ path =~ m | ^ iscsi: // ( [ ^ /]+)/ ( [ ^ /]+)/ ( . + ) $| ) {
my $ portal = $ 1 ;
my $ target = $ 2 ;
my $ lun = $ 3 ;
my $ initiator_name = get_initiator_name ( ) ;
return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name," .
"file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw" ;
}
die "cannot convert iscsi path '$path', unkown format\n" ;
}
2013-04-29 08:41:01 +02:00
sub qemu_img_convert {
2016-03-18 12:20:33 +01:00
my ( $ src_volid , $ dst_volid , $ size , $ snapname , $ is_zero_initialized ) = @ _ ;
2013-04-29 08:41:01 +02:00
my $ storecfg = PVE::Storage:: config ( ) ;
my ( $ src_storeid , $ src_volname ) = PVE::Storage:: parse_volume_id ( $ src_volid , 1 ) ;
my ( $ dst_storeid , $ dst_volname ) = PVE::Storage:: parse_volume_id ( $ dst_volid , 1 ) ;
2019-10-17 13:32:34 +02:00
die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if ! $ dst_storeid ;
2015-11-12 11:16:50 +01:00
2019-10-17 13:32:34 +02:00
my $ cachemode ;
my $ src_path ;
my $ src_is_iscsi = 0 ;
2019-12-09 10:31:33 +01:00
my $ src_format ;
2015-11-12 11:16:50 +01:00
2019-10-17 13:32:34 +02:00
if ( $ src_storeid ) {
PVE::Storage:: activate_volumes ( $ storecfg , [ $ src_volid ] , $ snapname ) ;
2013-04-29 08:41:01 +02:00
my $ src_scfg = PVE::Storage:: storage_config ( $ storecfg , $ src_storeid ) ;
2019-10-17 13:32:34 +02:00
$ src_format = qemu_img_format ( $ src_scfg , $ src_volname ) ;
$ src_path = PVE::Storage:: path ( $ storecfg , $ src_volid , $ snapname ) ;
$ src_is_iscsi = ( $ src_path =~ m | ^ iscsi: // | ) ;
$ cachemode = 'none' if $ src_scfg - > { type } eq 'zfspool' ;
2022-03-09 11:09:14 +01:00
} elsif ( - f $ src_volid || - b $ src_volid ) {
2019-10-17 13:32:34 +02:00
$ src_path = $ src_volid ;
2020-03-02 11:33:44 +01:00
if ( $ src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/ ) {
2019-10-17 13:32:34 +02:00
$ src_format = $ 1 ;
}
}
2013-04-29 08:41:01 +02:00
2019-10-17 13:32:34 +02:00
die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if ! $ src_path ;
2013-04-29 08:41:01 +02:00
2019-10-17 13:32:34 +02:00
my $ dst_scfg = PVE::Storage:: storage_config ( $ storecfg , $ dst_storeid ) ;
my $ dst_format = qemu_img_format ( $ dst_scfg , $ dst_volname ) ;
my $ dst_path = PVE::Storage:: path ( $ storecfg , $ dst_volid ) ;
my $ dst_is_iscsi = ( $ dst_path =~ m | ^ iscsi: // | ) ;
2013-04-29 08:41:01 +02:00
2019-10-17 13:32:34 +02:00
my $ cmd = [] ;
push @$ cmd , '/usr/bin/qemu-img' , 'convert' , '-p' , '-n' ;
2019-12-09 10:31:33 +01:00
push @$ cmd , '-l' , "snapshot.name=$snapname"
if $ snapname && $ src_format && $ src_format eq "qcow2" ;
2019-10-17 13:32:34 +02:00
push @$ cmd , '-t' , 'none' if $ dst_scfg - > { type } eq 'zfspool' ;
push @$ cmd , '-T' , $ cachemode if defined ( $ cachemode ) ;
if ( $ src_is_iscsi ) {
push @$ cmd , '--image-opts' ;
$ src_path = convert_iscsi_path ( $ src_path ) ;
2019-12-09 10:31:33 +01:00
} elsif ( $ src_format ) {
2019-10-17 13:32:34 +02:00
push @$ cmd , '-f' , $ src_format ;
}
2019-03-07 13:43:11 +01:00
2019-10-17 13:32:34 +02:00
if ( $ dst_is_iscsi ) {
push @$ cmd , '--target-image-opts' ;
$ dst_path = convert_iscsi_path ( $ dst_path ) ;
} else {
push @$ cmd , '-O' , $ dst_format ;
}
2019-03-07 13:43:11 +01:00
2019-10-17 13:32:34 +02:00
push @$ cmd , $ src_path ;
2019-03-07 13:43:11 +01:00
2019-10-17 13:32:34 +02:00
if ( ! $ dst_is_iscsi && $ is_zero_initialized ) {
push @$ cmd , "zeroinit:$dst_path" ;
} else {
push @$ cmd , $ dst_path ;
}
2019-03-07 13:43:11 +01:00
2019-10-17 13:32:34 +02:00
my $ parser = sub {
my $ line = shift ;
if ( $ line =~ m/\((\S+)\/100\%\)/ ) {
my $ percent = $ 1 ;
my $ transferred = int ( $ size * $ percent / 100 ) ;
2021-04-15 17:51:01 +02:00
my $ total_h = render_bytes ( $ size , 1 ) ;
my $ transferred_h = render_bytes ( $ transferred , 1 ) ;
2019-03-07 13:43:11 +01:00
2021-04-22 08:57:57 +02:00
print "transferred $transferred_h of $total_h ($percent%)\n" ;
2016-03-18 12:20:33 +01:00
}
2013-04-29 08:41:01 +02:00
2019-10-17 13:32:34 +02:00
} ;
2013-04-29 08:41:01 +02:00
2019-10-17 13:32:34 +02:00
eval { run_command ( $ cmd , timeout = > undef , outfunc = > $ parser ) ; } ;
my $ err = $@ ;
die "copy failed: $err" if $ err ;
2013-04-29 08:41:01 +02:00
}
sub qemu_img_format {
my ( $ scfg , $ volname ) = @ _ ;
2020-03-02 11:33:44 +01:00
if ( $ scfg - > { path } && $ volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/ ) {
2013-04-29 08:41:01 +02:00
return $ 1 ;
2013-07-15 09:13:31 +02:00
} else {
2013-04-29 08:41:01 +02:00
return "raw" ;
}
}
2013-05-02 18:18:03 +02:00
sub qemu_drive_mirror {
2020-03-17 08:55:25 +01:00
my ( $ vmid , $ drive , $ dst_volid , $ vmiddst , $ is_zero_initialized , $ jobs , $ completion , $ qga , $ bwlimit , $ src_bitmap ) = @ _ ;
2013-05-02 18:18:03 +02:00
2017-01-03 15:03:13 +01:00
$ jobs = { } if ! $ jobs ;
my $ qemu_target ;
my $ format ;
2017-01-03 15:03:18 +01:00
$ jobs - > { "drive-$drive" } = { } ;
2013-05-29 08:32:10 +02:00
2018-02-16 14:43:36 +01:00
if ( $ dst_volid =~ /^nbd:/ ) {
2018-02-15 13:43:10 +01:00
$ qemu_target = $ dst_volid ;
2017-01-03 15:03:13 +01:00
$ format = "nbd" ;
} else {
my $ storecfg = PVE::Storage:: config ( ) ;
my ( $ dst_storeid , $ dst_volname ) = PVE::Storage:: parse_volume_id ( $ dst_volid ) ;
my $ dst_scfg = PVE::Storage:: storage_config ( $ storecfg , $ dst_storeid ) ;
2013-05-02 18:18:03 +02:00
2017-01-03 15:03:13 +01:00
$ format = qemu_img_format ( $ dst_scfg , $ dst_volname ) ;
2014-11-09 15:13:01 +01:00
2017-01-03 15:03:13 +01:00
my $ dst_path = PVE::Storage:: path ( $ storecfg , $ dst_volid ) ;
2014-11-09 15:13:01 +01:00
2017-01-03 15:03:13 +01:00
$ qemu_target = $ is_zero_initialized ? "zeroinit:$dst_path" : $ dst_path ;
}
2016-03-18 12:20:33 +01:00
my $ opts = { timeout = > 10 , device = > "drive-$drive" , mode = > "existing" , sync = > "full" , target = > $ qemu_target } ;
2014-11-10 08:18:39 +01:00
$ opts - > { format } = $ format if $ format ;
2020-03-17 08:55:25 +01:00
if ( defined ( $ src_bitmap ) ) {
$ opts - > { sync } = 'incremental' ;
$ opts - > { bitmap } = $ src_bitmap ;
print "drive mirror re-using dirty bitmap '$src_bitmap'\n" ;
}
2019-04-01 11:30:59 +02:00
if ( defined ( $ bwlimit ) ) {
2019-04-02 14:33:10 +02:00
$ opts - > { speed } = $ bwlimit * 1024 ;
print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n" ;
2019-04-01 11:30:59 +02:00
} else {
print "drive mirror is starting for drive-$drive\n" ;
}
2014-11-09 15:13:01 +01:00
2019-03-30 09:36:30 +01:00
# if a job already runs for this device we get an error, catch it for cleanup
2019-11-19 12:23:47 +01:00
eval { mon_cmd ( $ vmid , "drive-mirror" , %$ opts ) ; } ;
2017-01-03 15:03:13 +01:00
if ( my $ err = $@ ) {
eval { PVE::QemuServer:: qemu_blockjobs_cancel ( $ vmid , $ jobs ) } ;
2019-03-30 09:36:30 +01:00
warn "$@\n" if $@ ;
die "mirroring error: $err\n" ;
2017-01-03 15:03:13 +01:00
}
2020-03-17 20:56:09 +01:00
qemu_drive_mirror_monitor ( $ vmid , $ vmiddst , $ jobs , $ completion , $ qga ) ;
2017-01-03 15:03:13 +01:00
}
2020-03-18 08:21:29 +01:00
# $completion can be either
# 'complete': wait until all jobs are ready, block-job-complete them (default)
# 'cancel': wait until all jobs are ready, block-job-cancel them
# 'skip': wait until all jobs are ready, return with block jobs in ready state
2021-03-03 10:56:07 +01:00
# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
2017-01-03 15:03:13 +01:00
sub qemu_drive_mirror_monitor {
2021-03-03 10:56:07 +01:00
my ( $ vmid , $ vmiddst , $ jobs , $ completion , $ qga , $ op ) = @ _ ;
2020-03-17 20:56:09 +01:00
2020-03-18 08:21:29 +01:00
$ completion // = 'complete' ;
2021-03-03 10:56:07 +01:00
$ op // = "mirror" ;
2016-04-11 15:19:46 +02:00
2014-11-10 07:55:09 +01:00
eval {
2017-01-03 15:03:13 +01:00
my $ err_complete = 0 ;
2021-04-06 19:39:41 +02:00
my $ starttime = time ( ) ;
2014-11-10 07:55:09 +01:00
while ( 1 ) {
2021-03-03 10:56:07 +01:00
die "block job ('$op') timed out\n" if $ err_complete > 300 ;
2017-01-03 15:03:13 +01:00
2019-11-19 12:23:47 +01:00
my $ stats = mon_cmd ( $ vmid , "query-block-jobs" ) ;
2021-04-06 19:39:41 +02:00
my $ ctime = time ( ) ;
2014-11-10 07:55:09 +01:00
2021-03-03 10:56:07 +01:00
my $ running_jobs = { } ;
2021-04-06 16:59:14 +02:00
for my $ stat ( @$ stats ) {
2021-03-03 10:56:07 +01:00
next if $ stat - > { type } ne $ op ;
$ running_jobs - > { $ stat - > { device } } = $ stat ;
2017-01-03 15:03:13 +01:00
}
2014-11-10 07:55:09 +01:00
2017-01-03 15:03:13 +01:00
my $ readycounter = 0 ;
2014-12-04 13:07:59 +01:00
2021-04-06 16:59:14 +02:00
for my $ job_id ( sort keys %$ jobs ) {
2021-04-06 17:46:19 +02:00
my $ job = $ running_jobs - > { $ job_id } ;
2017-01-03 15:03:13 +01:00
2021-04-06 17:46:19 +02:00
my $ vanished = ! defined ( $ job ) ;
2021-04-06 16:59:14 +02:00
my $ complete = defined ( $ jobs - > { $ job_id } - > { complete } ) && $ vanished ;
2021-03-03 10:56:07 +01:00
if ( $ complete || ( $ vanished && $ completion eq 'auto' ) ) {
2021-04-06 19:39:41 +02:00
print "$job_id: $op-job finished\n" ;
2021-04-06 16:59:14 +02:00
delete $ jobs - > { $ job_id } ;
2017-01-03 15:03:13 +01:00
next ;
}
2021-04-06 17:46:19 +02:00
die "$job_id: '$op' has been cancelled\n" if ! defined ( $ job ) ;
2014-11-21 12:31:56 +01:00
2021-04-06 17:46:19 +02:00
my $ busy = $ job - > { busy } ;
my $ ready = $ job - > { ready } ;
if ( my $ total = $ job - > { len } ) {
my $ transferred = $ job - > { offset } || 0 ;
2017-01-03 15:03:13 +01:00
my $ remaining = $ total - $ transferred ;
my $ percent = sprintf "%.2f" , ( $ transferred * 100 / $ total ) ;
2014-11-10 07:55:09 +01:00
2021-04-06 19:39:41 +02:00
my $ duration = $ ctime - $ starttime ;
my $ total_h = render_bytes ( $ total , 1 ) ;
my $ transferred_h = render_bytes ( $ transferred , 1 ) ;
my $ status = sprintf (
"transferred $transferred_h of $total_h ($percent%%) in %s" ,
render_duration ( $ duration ) ,
) ;
if ( $ ready ) {
if ( $ busy ) {
$ status . = ", still busy" ; # shouldn't even happen? but mirror is weird
} else {
$ status . = ", ready" ;
}
}
2021-04-15 17:52:52 +02:00
print "$job_id: $status\n" if ! $ jobs - > { $ job_id } - > { ready } ;
$ jobs - > { $ job_id } - > { ready } = $ ready ;
2017-01-03 15:03:13 +01:00
}
2014-11-21 12:31:56 +01:00
2021-04-06 17:46:19 +02:00
$ readycounter + + if $ job - > { ready } ;
2017-01-03 15:03:13 +01:00
}
2014-11-10 06:31:08 +01:00
2017-01-03 15:03:13 +01:00
last if scalar ( keys %$ jobs ) == 0 ;
if ( $ readycounter == scalar ( keys %$ jobs ) ) {
2021-03-03 10:56:07 +01:00
print "all '$op' jobs are ready\n" ;
# do the complete later (or has already been done)
last if $ completion eq 'skip' || $ completion eq 'auto' ;
2017-01-03 15:03:13 +01:00
if ( $ vmiddst && $ vmiddst != $ vmid ) {
2017-11-10 09:47:43 +01:00
my $ agent_running = $ qga && qga_check_running ( $ vmid ) ;
if ( $ agent_running ) {
2017-01-03 15:03:19 +01:00
print "freeze filesystem\n" ;
2019-11-19 12:23:47 +01:00
eval { mon_cmd ( $ vmid , "guest-fsfreeze-freeze" ) ; } ;
2022-01-27 15:01:52 +01:00
warn $@ if $@ ;
2017-01-03 15:03:19 +01:00
} else {
print "suspend vm\n" ;
eval { PVE::QemuServer:: vm_suspend ( $ vmid , 1 ) ; } ;
2022-01-27 15:01:52 +01:00
warn $@ if $@ ;
2017-01-03 15:03:19 +01:00
}
2017-01-03 15:03:13 +01:00
# if we clone a disk for a new target vm, we don't switch the disk
PVE::QemuServer:: qemu_blockjobs_cancel ( $ vmid , $ jobs ) ;
2017-01-03 15:03:19 +01:00
2017-11-10 09:47:43 +01:00
if ( $ agent_running ) {
2017-01-03 15:03:19 +01:00
print "unfreeze filesystem\n" ;
2019-11-19 12:23:47 +01:00
eval { mon_cmd ( $ vmid , "guest-fsfreeze-thaw" ) ; } ;
2022-01-27 15:01:52 +01:00
warn $@ if $@ ;
2017-01-03 15:03:19 +01:00
} else {
print "resume vm\n" ;
2022-01-27 15:01:52 +01:00
eval { PVE::QemuServer:: vm_resume ( $ vmid , 1 , 1 ) ; } ;
warn $@ if $@ ;
2017-01-03 15:03:19 +01:00
}
2016-04-11 15:19:46 +02:00
last ;
2017-01-03 15:03:13 +01:00
} else {
2021-04-06 16:59:14 +02:00
for my $ job_id ( sort keys %$ jobs ) {
2017-01-03 15:03:13 +01:00
# try to switch the disk if source and destination are on the same guest
2021-04-06 16:59:14 +02:00
print "$job_id: Completing block job_id...\n" ;
2017-01-03 15:03:13 +01:00
2020-03-17 20:56:09 +01:00
my $ op ;
2020-03-18 08:21:29 +01:00
if ( $ completion eq 'complete' ) {
2020-03-17 20:56:09 +01:00
$ op = 'block-job-complete' ;
2020-03-18 08:21:29 +01:00
} elsif ( $ completion eq 'cancel' ) {
2020-03-17 20:56:09 +01:00
$ op = 'block-job-cancel' ;
} else {
die "invalid completion value: $completion\n" ;
}
2021-04-06 16:59:14 +02:00
eval { mon_cmd ( $ vmid , $ op , device = > $ job_id ) } ;
2017-01-03 15:03:13 +01:00
if ( $@ =~ m/cannot be completed/ ) {
2021-04-06 19:39:41 +02:00
print "$job_id: block job cannot be completed, trying again.\n" ;
2017-01-03 15:03:13 +01:00
$ err_complete + + ;
} else {
2021-04-06 16:59:14 +02:00
print "$job_id: Completed successfully.\n" ;
$ jobs - > { $ job_id } - > { complete } = 1 ;
2017-01-03 15:03:13 +01:00
}
}
2016-04-11 15:19:46 +02:00
}
2014-11-10 07:55:09 +01:00
}
sleep 1 ;
2013-05-02 18:18:03 +02:00
}
2014-11-10 07:55:09 +01:00
} ;
2014-11-10 08:18:39 +01:00
my $ err = $@ ;
2014-11-10 07:55:09 +01:00
2014-11-10 08:18:39 +01:00
if ( $ err ) {
2017-01-03 15:03:13 +01:00
eval { PVE::QemuServer:: qemu_blockjobs_cancel ( $ vmid , $ jobs ) } ;
2021-04-06 19:39:41 +02:00
die "block job ($op) error: $err" ;
2014-11-10 08:18:39 +01:00
}
2017-01-03 15:03:13 +01:00
}
sub qemu_blockjobs_cancel {
my ( $ vmid , $ jobs ) = @ _ ;
foreach my $ job ( keys %$ jobs ) {
2017-01-05 09:54:07 +01:00
print "$job: Cancelling block job\n" ;
2019-11-19 12:23:47 +01:00
eval { mon_cmd ( $ vmid , "block-job-cancel" , device = > $ job ) ; } ;
2017-01-03 15:03:13 +01:00
$ jobs - > { $ job } - > { cancel } = 1 ;
}
while ( 1 ) {
2019-11-19 12:23:47 +01:00
my $ stats = mon_cmd ( $ vmid , "query-block-jobs" ) ;
2017-01-03 15:03:13 +01:00
my $ running_jobs = { } ;
foreach my $ stat ( @$ stats ) {
$ running_jobs - > { $ stat - > { device } } = $ stat ;
}
foreach my $ job ( keys %$ jobs ) {
2017-01-05 09:54:07 +01:00
if ( defined ( $ jobs - > { $ job } - > { cancel } ) && ! defined ( $ running_jobs - > { $ job } ) ) {
print "$job: Done.\n" ;
2017-01-03 15:03:13 +01:00
delete $ jobs - > { $ job } ;
}
}
last if scalar ( keys %$ jobs ) == 0 ;
sleep 1 ;
2013-05-02 18:18:03 +02:00
}
}
fix #4525: clone disk: disallow mirror if it might cause problems with io_uring
The target of the drive-mirror operation is opened with (essentially)
the same flags as the source in QEMU, in particular whether io_uring
should be used is inherited.
But io_uring currently causes problems in combination with certain
storage types, sometimes even leading to crashes (LVM with Linux 6.1).
Just disallow live cloning of drives when the source uses io_uring and
the target storage is not ready for it. There is one exception, namely
when source and target storage are the same. In that case, just assume
it will keep working for the target.
Migration does not seem to be affected, because there, the target VM
opens the images with the checked aio setting and then NBD exports of
those are used as the targets for mirroring.
It can be that the default determined for the source is not what's
actually used, because after a drive-mirror to a storage with a
different default, it will still use the default from the old storage.
Unfortunately, aio doesn't seem to be part of the 'query-block' QMP
command's result, so just tolerate this edge case.
The check can be removed if either
1. drive-mirror learns to open the target with a different aio setting
or more ideally
2. there are no more bad storages for io_uring.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2023-02-10 15:19:12 +01:00
# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
# source, but some storages have problems with io_uring, sometimes even leading to crashes.
my sub clone_disk_check_io_uring {
my ( $ src_drive , $ storecfg , $ src_storeid , $ dst_storeid , $ use_drive_mirror ) = @ _ ;
return if ! $ use_drive_mirror ;
# Don't complain when not changing storage.
# Assume if it works for the source, it'll work for the target too.
return if $ src_storeid eq $ dst_storeid ;
my $ src_scfg = PVE::Storage:: storage_config ( $ storecfg , $ src_storeid ) ;
my $ dst_scfg = PVE::Storage:: storage_config ( $ storecfg , $ dst_storeid ) ;
my $ cache_direct = drive_uses_cache_direct ( $ src_drive ) ;
my $ src_uses_io_uring ;
if ( $ src_drive - > { aio } ) {
$ src_uses_io_uring = $ src_drive - > { aio } eq 'io_uring' ;
} else {
$ src_uses_io_uring = storage_allows_io_uring_default ( $ src_scfg , $ cache_direct ) ;
}
die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
if $ src_uses_io_uring && ! storage_allows_io_uring_default ( $ dst_scfg , $ cache_direct ) ;
}
2013-05-29 08:32:10 +02:00
sub clone_disk {
2022-03-09 11:09:07 +01:00
my ( $ storecfg , $ source , $ dest , $ full , $ newvollist , $ jobs , $ completion , $ qga , $ bwlimit ) = @ _ ;
my ( $ vmid , $ running ) = $ source - > @ { qw( vmid running ) } ;
2022-03-09 11:09:09 +01:00
my ( $ src_drivename , $ drive , $ snapname ) = $ source - > @ { qw( drivename drive snapname ) } ;
2022-03-09 11:09:07 +01:00
2022-03-09 11:09:09 +01:00
my ( $ newvmid , $ dst_drivename , $ efisize ) = $ dest - > @ { qw( vmid drivename efisize ) } ;
2022-03-09 11:09:07 +01:00
my ( $ storage , $ format ) = $ dest - > @ { qw( storage format ) } ;
2013-05-29 08:32:10 +02:00
2022-03-17 12:30:59 +01:00
my $ use_drive_mirror = $ full && $ running && $ src_drivename && ! $ snapname ;
2022-03-09 11:09:09 +01:00
if ( $ src_drivename && $ dst_drivename && $ src_drivename ne $ dst_drivename ) {
die "cloning from/to EFI disk requires EFI disk\n"
if $ src_drivename eq 'efidisk0' || $ dst_drivename eq 'efidisk0' ;
die "cloning from/to TPM state requires TPM state\n"
if $ src_drivename eq 'tpmstate0' || $ dst_drivename eq 'tpmstate0' ;
2022-03-17 12:30:59 +01:00
# This would lead to two device nodes in QEMU pointing to the same backing image!
die "cannot change drive name when cloning disk from/to the same VM\n"
if $ use_drive_mirror && $ vmid == $ newvmid ;
2022-03-09 11:09:09 +01:00
}
2022-03-17 12:31:00 +01:00
die "cannot move TPM state while VM is running\n"
if $ use_drive_mirror && $ src_drivename eq 'tpmstate0' ;
2013-05-29 08:32:10 +02:00
my $ newvolid ;
2022-03-09 11:09:09 +01:00
print "create " . ( $ full ? 'full' : 'linked' ) . " clone of drive " ;
print "$src_drivename " if $ src_drivename ;
print "($drive->{file})\n" ;
2013-05-29 08:32:10 +02:00
if ( ! $ full ) {
2014-07-04 10:25:47 +02:00
$ newvolid = PVE::Storage:: vdisk_clone ( $ storecfg , $ drive - > { file } , $ newvmid , $ snapname ) ;
2013-05-29 08:32:10 +02:00
push @$ newvollist , $ newvolid ;
} else {
fix #4525: clone disk: disallow mirror if it might cause problems with io_uring
The target of the drive-mirror operation is opened with (essentially)
the same flags as the source in QEMU, in particular whether io_uring
should be used is inherited.
But io_uring currently causes problems in combination with certain
storage types, sometimes even leading to crashes (LVM with Linux 6.1).
Just disallow live cloning of drives when the source uses io_uring and
the target storage is not ready for it. There is one exception, namely
when source and target storage are the same. In that case, just assume
it will keep working for the target.
Migration does not seem to be affected, because there, the target VM
opens the images with the checked aio setting and then NBD exports of
those are used as the targets for mirroring.
It can be that the default determined for the source is not what's
actually used, because after a drive-mirror to a storage with a
different default, it will still use the default from the old storage.
Unfortunately, aio doesn't seem to be part of the 'query-block' QMP
command's result, so just tolerate this edge case.
The check can be removed if either
1. drive-mirror learns to open the target with a different aio setting
or more ideally
2. there are no more bad storages for io_uring.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2023-02-10 15:19:12 +01:00
my ( $ src_storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ drive - > { file } ) ;
my $ storeid = $ storage || $ src_storeid ;
2013-05-29 08:32:10 +02:00
2017-06-01 10:26:37 +02:00
my $ dst_format = resolve_dst_disk_format ( $ storecfg , $ storeid , $ volname , $ format ) ;
2013-05-29 08:32:10 +02:00
2018-02-23 11:07:19 +01:00
my $ name = undef ;
2020-09-28 10:36:31 +02:00
my $ size = undef ;
2019-11-19 09:25:54 +01:00
if ( drive_is_cloudinit ( $ drive ) ) {
$ name = "vm-$newvmid-cloudinit" ;
2020-12-02 13:50:25 +01:00
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
if ( $ scfg - > { path } ) {
$ name . = ".$dst_format" ;
}
2019-11-19 09:25:54 +01:00
$ snapname = undef ;
$ size = PVE::QemuServer::Cloudinit:: CLOUDINIT_DISK_SIZE ;
2022-03-09 11:09:09 +01:00
} elsif ( $ dst_drivename eq 'efidisk0' ) {
2022-03-09 11:09:08 +01:00
$ size = $ efisize or die "internal error - need to specify EFI disk size\n" ;
2022-03-09 11:09:09 +01:00
} elsif ( $ dst_drivename eq 'tpmstate0' ) {
2022-02-16 11:47:49 +01:00
$ dst_format = 'raw' ;
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
$ size = PVE::QemuServer::Drive:: TPMSTATE_DISK_SIZE ;
2020-09-28 10:36:31 +02:00
} else {
fix #4525: clone disk: disallow mirror if it might cause problems with io_uring
The target of the drive-mirror operation is opened with (essentially)
the same flags as the source in QEMU, in particular whether io_uring
should be used is inherited.
But io_uring currently causes problems in combination with certain
storage types, sometimes even leading to crashes (LVM with Linux 6.1).
Just disallow live cloning of drives when the source uses io_uring and
the target storage is not ready for it. There is one exception, namely
when source and target storage are the same. In that case, just assume
it will keep working for the target.
Migration does not seem to be affected, because there, the target VM
opens the images with the checked aio setting and then NBD exports of
those are used as the targets for mirroring.
It can be that the default determined for the source is not what's
actually used, because after a drive-mirror to a storage with a
different default, it will still use the default from the old storage.
Unfortunately, aio doesn't seem to be part of the 'query-block' QMP
command's result, so just tolerate this edge case.
The check can be removed if either
1. drive-mirror learns to open the target with a different aio setting
or more ideally
2. there are no more bad storages for io_uring.
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
2023-02-10 15:19:12 +01:00
clone_disk_check_io_uring ( $ drive , $ storecfg , $ src_storeid , $ storeid , $ use_drive_mirror ) ;
2020-11-25 14:16:28 +01:00
( $ size ) = PVE::Storage:: volume_size_info ( $ storecfg , $ drive - > { file } , 10 ) ;
2019-11-19 09:25:54 +01:00
}
2020-12-07 13:15:15 +01:00
$ newvolid = PVE::Storage:: vdisk_alloc (
$ storecfg , $ storeid , $ newvmid , $ dst_format , $ name , ( $ size / 1024 )
) ;
2013-05-29 08:32:10 +02:00
push @$ newvollist , $ newvolid ;
2016-07-13 12:44:13 +02:00
PVE::Storage:: activate_volumes ( $ storecfg , [ $ newvolid ] ) ;
2015-09-03 13:12:13 +02:00
2019-11-19 09:25:54 +01:00
if ( drive_is_cloudinit ( $ drive ) ) {
2021-01-15 12:49:24 +01:00
# when cloning multiple disks (e.g. during clone_vm) it might be the last disk
# if this is the case, we have to complete any block-jobs still there from
# previous drive-mirrors
if ( ( $ completion eq 'complete' ) && ( scalar ( keys %$ jobs ) > 0 ) ) {
qemu_drive_mirror_monitor ( $ vmid , $ newvmid , $ jobs , $ completion , $ qga ) ;
}
2019-11-19 09:25:54 +01:00
goto no_data_clone ;
}
2016-03-18 12:20:33 +01:00
my $ sparseinit = PVE::Storage:: volume_has_feature ( $ storecfg , 'sparseinit' , $ newvolid ) ;
2022-03-17 12:30:59 +01:00
if ( $ use_drive_mirror ) {
qemu_drive_mirror ( $ vmid , $ src_drivename , $ newvolid , $ newvmid , $ sparseinit , $ jobs ,
$ completion , $ qga , $ bwlimit ) ;
} else {
2019-04-01 11:31:02 +02:00
# TODO: handle bwlimits
2022-03-09 11:09:09 +01:00
if ( $ dst_drivename eq 'efidisk0' ) {
2020-02-12 13:31:06 +01:00
# the relevant data on the efidisk may be smaller than the source
# e.g. on RBD/ZFS, so we use dd to copy only the amount
# that is given by the OVMF_VARS.fd
2022-03-17 12:31:02 +01:00
my $ src_path = PVE::Storage:: path ( $ storecfg , $ drive - > { file } , $ snapname ) ;
2020-02-12 13:31:06 +01:00
my $ dst_path = PVE::Storage:: path ( $ storecfg , $ newvolid ) ;
2021-03-01 13:02:23 +01:00
2022-03-17 12:31:02 +01:00
my $ src_format = ( PVE::Storage:: parse_volname ( $ storecfg , $ drive - > { file } ) ) [ 6 ] ;
2021-03-01 13:02:23 +01:00
# better for Ceph if block size is not too small, see bug #3324
my $ bs = 1024 * 1024 ;
2022-03-17 12:31:02 +01:00
my $ cmd = [ 'qemu-img' , 'dd' , '-n' , '-O' , $ dst_format ] ;
2022-04-05 10:22:14 +02:00
if ( $ src_format eq 'qcow2' && $ snapname ) {
die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
if ! min_version ( kvm_user_version ( ) , 6 , 2 ) ;
push $ cmd - > @ * , '-l' , $ snapname ;
}
2022-03-17 12:31:02 +01:00
push $ cmd - > @ * , "bs=$bs" , "osize=$size" , "if=$src_path" , "of=$dst_path" ;
run_command ( $ cmd ) ;
2020-02-12 13:31:06 +01:00
} else {
qemu_img_convert ( $ drive - > { file } , $ newvolid , $ size , $ snapname , $ sparseinit ) ;
}
2013-07-15 09:13:31 +02:00
}
2013-05-29 08:32:10 +02:00
}
2019-11-19 09:25:54 +01:00
no_data_clone:
2020-11-25 14:16:28 +01:00
my ( $ size ) = eval { PVE::Storage:: volume_size_info ( $ storecfg , $ newvolid , 10 ) } ;
2013-05-29 08:32:10 +02:00
2022-01-27 15:01:55 +01:00
my $ disk = dclone ( $ drive ) ;
delete $ disk - > { format } ;
2013-05-29 08:32:10 +02:00
$ disk - > { file } = $ newvolid ;
2020-11-25 14:16:28 +01:00
$ disk - > { size } = $ size if defined ( $ size ) ;
2013-05-29 08:32:10 +02:00
return $ disk ;
}
2016-10-17 12:20:43 +02:00
sub get_running_qemu_version {
my ( $ vmid ) = @ _ ;
2019-11-19 12:23:47 +01:00
my $ res = mon_cmd ( $ vmid , "query-version" ) ;
2016-10-17 12:20:43 +02:00
return "$res->{qemu}->{major}.$res->{qemu}->{minor}" ;
}
2015-11-06 10:27:05 +01:00
sub qemu_use_old_bios_files {
my ( $ machine_type ) = @ _ ;
return if ! $ machine_type ;
my $ use_old_bios_files = undef ;
if ( $ machine_type =~ m/^(\S+)\.pxe$/ ) {
$ machine_type = $ 1 ;
$ use_old_bios_files = 1 ;
} else {
2020-09-02 14:07:02 +02:00
my $ version = extract_version ( $ machine_type , kvm_user_version ( ) ) ;
2015-11-06 10:27:05 +01:00
# Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
# load new efi bios files on migration. So this hack is required to allow
# live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
# updrading from proxmox-ve-3.X to proxmox-ve 4.0
2019-11-19 12:23:49 +01:00
$ use_old_bios_files = ! min_version ( $ version , 2 , 4 ) ;
2015-11-06 10:27:05 +01:00
}
return ( $ use_old_bios_files , $ machine_type ) ;
}
2020-02-12 13:31:06 +01:00
sub get_efivars_size {
2022-03-09 11:09:10 +01:00
my ( $ conf , $ efidisk ) = @ _ ;
2020-02-12 13:31:06 +01:00
my $ arch = get_vm_arch ( $ conf ) ;
2022-03-09 11:09:10 +01:00
$ efidisk // = $ conf - > { efidisk0 } ? parse_drive ( 'efidisk0' , $ conf - > { efidisk0 } ) : undef ;
2021-10-11 14:10:24 +02:00
my $ smm = PVE::QemuServer::Machine:: machine_type_is_q35 ( $ conf ) ;
my ( undef , $ ovmf_vars ) = get_ovmf_files ( $ arch , $ efidisk , $ smm ) ;
2020-02-12 13:31:06 +01:00
return - s $ ovmf_vars ;
}
sub update_efidisk_size {
my ( $ conf ) = @ _ ;
return if ! defined ( $ conf - > { efidisk0 } ) ;
my $ disk = PVE::QemuServer:: parse_drive ( 'efidisk0' , $ conf - > { efidisk0 } ) ;
$ disk - > { size } = get_efivars_size ( $ conf ) ;
$ conf - > { efidisk0 } = print_drive ( $ disk ) ;
return ;
}
fix #3075: add TPM v1.2 and v2.0 support via swtpm
Starts an instance of swtpm per VM in it's systemd scope, it will
terminate by itself if the VM exits, or be terminated manually if
startup fails.
Before first use, a TPM state is created via swtpm_setup. State is
stored in a 'tpmstate0' volume, treated much the same way as an efidisk.
It is migrated 'offline', the important part here is the creation of the
target volume, the actual data transfer happens via the QEMU device
state migration process.
Move-disk can only work offline, as the disk is not registered with
QEMU, so 'drive-mirror' wouldn't work. swtpm itself has no method of
moving a backing storage at runtime.
For backups, a bit of a workaround is necessary (this may later be
replaced by NBD support in swtpm): During the backup, we attach the
backing file of the TPM as a read-only drive to QEMU, so our backup
code can detect it as a block device and back it up as such, while
ensuring consistency with the rest of disk state ("snapshot" semantic).
The name for the ephemeral drive is specifically chosen as
'drive-tpmstate0-backup', diverging from our usual naming scheme with
the '-backup' suffix, to avoid it ever being treated as a regular drive
from the rest of the stack in case it gets left over after a backup for
some reason (shouldn't happen).
Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
2021-10-04 17:29:20 +02:00
sub update_tpmstate_size {
my ( $ conf ) = @ _ ;
my $ disk = PVE::QemuServer:: parse_drive ( 'tpmstate0' , $ conf - > { tpmstate0 } ) ;
$ disk - > { size } = PVE::QemuServer::Drive:: TPMSTATE_DISK_SIZE ;
$ conf - > { tpmstate0 } = print_drive ( $ disk ) ;
}
2021-10-11 14:10:24 +02:00
sub create_efidisk ($$$$$$$) {
my ( $ storecfg , $ storeid , $ vmid , $ fmt , $ arch , $ efidisk , $ smm ) = @ _ ;
2017-09-11 08:40:28 +02:00
2021-10-11 14:10:24 +02:00
my ( undef , $ ovmf_vars ) = get_ovmf_files ( $ arch , $ efidisk , $ smm ) ;
2017-09-11 08:40:28 +02:00
2019-10-17 13:32:34 +02:00
my $ vars_size_b = - s $ ovmf_vars ;
my $ vars_size = PVE::Tools:: convert_size ( $ vars_size_b , 'b' = > 'kb' ) ;
2017-09-11 08:40:28 +02:00
my $ volid = PVE::Storage:: vdisk_alloc ( $ storecfg , $ storeid , $ vmid , $ fmt , undef , $ vars_size ) ;
PVE::Storage:: activate_volumes ( $ storecfg , [ $ volid ] ) ;
2019-10-17 13:32:34 +02:00
qemu_img_convert ( $ ovmf_vars , $ volid , $ vars_size_b , undef , 0 ) ;
2019-12-04 18:40:31 +01:00
my ( $ size ) = PVE::Storage:: volume_size_info ( $ storecfg , $ volid , 3 ) ;
2017-09-11 08:40:28 +02:00
2019-12-04 18:40:31 +01:00
return ( $ volid , $ size / 1024 ) ;
2017-09-11 08:40:28 +02:00
}
2015-03-19 11:06:12 +01:00
sub vm_iothreads_list {
my ( $ vmid ) = @ _ ;
2019-11-19 12:23:47 +01:00
my $ res = mon_cmd ( $ vmid , 'query-iothreads' ) ;
2015-03-19 11:06:12 +01:00
my $ iothreads = { } ;
foreach my $ iothread ( @$ res ) {
$ iothreads - > { $ iothread - > { id } } = $ iothread - > { "thread-id" } ;
}
return $ iothreads ;
}
2015-03-27 03:41:54 +01:00
sub scsihw_infos {
my ( $ conf , $ drive ) = @ _ ;
my $ maxdev = 0 ;
2017-02-13 12:00:26 +01:00
if ( ! $ conf - > { scsihw } || ( $ conf - > { scsihw } =~ m/^lsi/ ) ) {
2015-03-27 03:41:54 +01:00
$ maxdev = 7 ;
2015-03-27 06:15:01 +01:00
} elsif ( $ conf - > { scsihw } && ( $ conf - > { scsihw } eq 'virtio-scsi-single' ) ) {
2015-03-27 03:41:54 +01:00
$ maxdev = 1 ;
} else {
$ maxdev = 256 ;
}
my $ controller = int ( $ drive - > { index } / $ maxdev ) ;
2020-09-02 14:07:02 +02:00
my $ controller_prefix = ( $ conf - > { scsihw } && $ conf - > { scsihw } eq 'virtio-scsi-single' )
? "virtioscsi"
: "scsihw" ;
2015-03-27 03:41:54 +01:00
return ( $ maxdev , $ controller , $ controller_prefix ) ;
}
2015-03-27 06:15:01 +01:00
2017-06-01 10:26:37 +02:00
sub resolve_dst_disk_format {
my ( $ storecfg , $ storeid , $ src_volname , $ format ) = @ _ ;
my ( $ defFormat , $ validFormats ) = PVE::Storage:: storage_default_format ( $ storecfg , $ storeid ) ;
if ( ! $ format ) {
# if no target format is specified, use the source disk format as hint
if ( $ src_volname ) {
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
$ format = qemu_img_format ( $ scfg , $ src_volname ) ;
} else {
return $ defFormat ;
}
}
# test if requested format is supported - else use default
my $ supported = grep { $ _ eq $ format } @$ validFormats ;
$ format = $ defFormat if ! $ supported ;
return $ format ;
}
2019-12-09 15:26:58 +01:00
# NOTE: if this logic changes, please update docs & possibly gui logic
sub find_vmstate_storage {
my ( $ conf , $ storecfg ) = @ _ ;
# first, return storage from conf if set
return $ conf - > { vmstatestorage } if $ conf - > { vmstatestorage } ;
my ( $ target , $ shared , $ local ) ;
foreach_storage_used_by_vm ( $ conf , sub {
my ( $ sid ) = @ _ ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ sid ) ;
my $ dst = $ scfg - > { shared } ? \ $ shared : \ $ local ;
$$ dst = $ sid if ! $$ dst || $ scfg - > { path } ; # prefer file based storage
} ) ;
# second, use shared storage where VM has at least one disk
# third, use local storage where VM has at least one disk
# fall back to local storage
$ target = $ shared // $ local // 'local' ;
return $ target ;
}
2018-09-19 11:35:11 +02:00
sub generate_uuid {
2017-09-13 16:10:25 +02:00
my ( $ uuid , $ uuid_str ) ;
UUID:: generate ( $ uuid ) ;
UUID:: unparse ( $ uuid , $ uuid_str ) ;
2018-09-19 11:35:11 +02:00
return $ uuid_str ;
}
sub generate_smbios1_uuid {
return "uuid=" . generate_uuid ( ) ;
2017-09-13 16:10:25 +02:00
}
2018-11-09 16:11:18 +01:00
sub nbd_stop {
my ( $ vmid ) = @ _ ;
2019-11-19 12:23:47 +01:00
mon_cmd ( $ vmid , 'nbd-server-stop' ) ;
2018-11-09 16:11:18 +01:00
}
2019-09-11 14:07:42 +02:00
sub create_reboot_request {
my ( $ vmid ) = @ _ ;
open ( my $ fh , '>' , "/run/qemu-server/$vmid.reboot" )
or die "failed to create reboot trigger file: $!\n" ;
close ( $ fh ) ;
}
sub clear_reboot_request {
my ( $ vmid ) = @ _ ;
my $ path = "/run/qemu-server/$vmid.reboot" ;
my $ res = 0 ;
$ res = unlink ( $ path ) ;
die "could not remove reboot request for $vmid: $!"
if ! $ res && $! != POSIX:: ENOENT ;
return $ res ;
}
2020-10-06 15:32:14 +02:00
sub bootorder_from_legacy {
my ( $ conf , $ bootcfg ) = @ _ ;
my $ boot = $ bootcfg - > { legacy } || $ boot_fmt - > { legacy } - > { default } ;
my $ bootindex_hash = { } ;
my $ i = 1 ;
foreach my $ o ( split ( // , $ boot ) ) {
$ bootindex_hash - > { $ o } = $ i * 100 ;
$ i + + ;
}
my $ bootorder = { } ;
PVE::QemuConfig - > foreach_volume ( $ conf , sub {
my ( $ ds , $ drive ) = @ _ ;
if ( drive_is_cdrom ( $ drive , 1 ) ) {
if ( $ bootindex_hash - > { d } ) {
$ bootorder - > { $ ds } = $ bootindex_hash - > { d } ;
$ bootindex_hash - > { d } += 1 ;
}
} elsif ( $ bootindex_hash - > { c } ) {
$ bootorder - > { $ ds } = $ bootindex_hash - > { c }
if $ conf - > { bootdisk } && $ conf - > { bootdisk } eq $ ds ;
$ bootindex_hash - > { c } += 1 ;
}
} ) ;
if ( $ bootindex_hash - > { n } ) {
for ( my $ i = 0 ; $ i < $ MAX_NETS ; $ i + + ) {
my $ netname = "net$i" ;
next if ! $ conf - > { $ netname } ;
$ bootorder - > { $ netname } = $ bootindex_hash - > { n } ;
$ bootindex_hash - > { n } += 1 ;
}
}
return $ bootorder ;
}
# Generate default device list for 'boot: order=' property. Matches legacy
# default boot order, but with explicit device names. This is important, since
# the fallback for when neither 'order' nor the old format is specified relies
# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
sub get_default_bootdevices {
my ( $ conf ) = @ _ ;
my @ ret = ( ) ;
# harddisk
my $ first = PVE::QemuServer::Drive:: resolve_first_disk ( $ conf , 0 ) ;
push @ ret , $ first if $ first ;
# cdrom
$ first = PVE::QemuServer::Drive:: resolve_first_disk ( $ conf , 1 ) ;
push @ ret , $ first if $ first ;
# network
for ( my $ i = 0 ; $ i < $ MAX_NETS ; $ i + + ) {
my $ netname = "net$i" ;
next if ! $ conf - > { $ netname } ;
push @ ret , $ netname ;
last ;
}
return \ @ ret ;
}
2020-10-16 16:52:11 +02:00
sub device_bootorder {
my ( $ conf ) = @ _ ;
return bootorder_from_legacy ( $ conf ) if ! defined ( $ conf - > { boot } ) ;
my $ boot = parse_property_string ( $ boot_fmt , $ conf - > { boot } ) ;
my $ bootorder = { } ;
if ( ! defined ( $ boot ) || $ boot - > { legacy } ) {
$ bootorder = bootorder_from_legacy ( $ conf , $ boot ) ;
} elsif ( $ boot - > { order } ) {
my $ i = 100 ; # start at 100 to allow user to insert devices before us with -args
for my $ dev ( PVE::Tools:: split_list ( $ boot - > { order } ) ) {
$ bootorder - > { $ dev } = $ i + + ;
}
}
return $ bootorder ;
}
2021-03-03 10:56:10 +01:00
sub register_qmeventd_handle {
my ( $ vmid ) = @ _ ;
my $ fh ;
my $ peer = "/var/run/qmeventd.sock" ;
my $ count = 0 ;
for ( ; ; ) {
$ count + + ;
$ fh = IO::Socket::UNIX - > new ( Peer = > $ peer , Blocking = > 0 , Timeout = > 1 ) ;
last if $ fh ;
if ( $! != EINTR && $! != EAGAIN ) {
die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n" ;
}
if ( $ count > 4 ) {
die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
. "after $count retries\n" ;
}
usleep ( 25000 ) ;
}
# send handshake to mark VM as backing up
print $ fh to_json ( { vzdump = > { vmid = > "$vmid" } } ) ;
# return handle to be closed later when inhibit is no longer required
return $ fh ;
}
2015-09-06 16:01:59 +02:00
# bash completion helper
sub complete_backup_archives {
my ( $ cmdname , $ pname , $ cvalue ) = @ _ ;
my $ cfg = PVE::Storage:: config ( ) ;
my $ storeid ;
if ( $ cvalue =~ m/^([^:]+):/ ) {
$ storeid = $ 1 ;
}
my $ data = PVE::Storage:: template_list ( $ cfg , $ storeid , 'backup' ) ;
my $ res = [] ;
foreach my $ id ( keys %$ data ) {
foreach my $ item ( @ { $ data - > { $ id } } ) {
2020-04-28 15:58:14 +02:00
next if $ item - > { format } !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/ ;
2015-09-06 16:01:59 +02:00
push @$ res , $ item - > { volid } if defined ( $ item - > { volid } ) ;
}
}
return $ res ;
}
my $ complete_vmid_full = sub {
my ( $ running ) = @ _ ;
my $ idlist = vmstatus ( ) ;
my $ res = [] ;
foreach my $ id ( keys %$ idlist ) {
my $ d = $ idlist - > { $ id } ;
if ( defined ( $ running ) ) {
next if $ d - > { template } ;
next if $ running && $ d - > { status } ne 'running' ;
next if ! $ running && $ d - > { status } eq 'running' ;
}
push @$ res , $ id ;
}
return $ res ;
} ;
sub complete_vmid {
return & $ complete_vmid_full ( ) ;
}
sub complete_vmid_stopped {
return & $ complete_vmid_full ( 0 ) ;
}
sub complete_vmid_running {
return & $ complete_vmid_full ( 1 ) ;
}
2015-09-07 08:13:07 +02:00
sub complete_storage {
my $ cfg = PVE::Storage:: config ( ) ;
my $ ids = $ cfg - > { ids } ;
my $ res = [] ;
foreach my $ sid ( keys %$ ids ) {
next if ! PVE::Storage:: storage_check_enabled ( $ cfg , $ sid , undef , 1 ) ;
2015-11-09 11:11:47 +01:00
next if ! $ ids - > { $ sid } - > { content } - > { images } ;
2015-09-07 08:13:07 +02:00
push @$ res , $ sid ;
}
return $ res ;
}
2019-11-18 15:23:18 +01:00
sub complete_migration_storage {
my ( $ cmd , $ param , $ current_value , $ all_args ) = @ _ ;
my $ targetnode = @$ all_args [ 1 ] ;
my $ cfg = PVE::Storage:: config ( ) ;
my $ ids = $ cfg - > { ids } ;
my $ res = [] ;
foreach my $ sid ( keys %$ ids ) {
next if ! PVE::Storage:: storage_check_enabled ( $ cfg , $ sid , $ targetnode , 1 ) ;
next if ! $ ids - > { $ sid } - > { content } - > { images } ;
push @$ res , $ sid ;
}
return $ res ;
}
2021-01-20 13:32:04 +01:00
sub vm_is_paused {
my ( $ vmid ) = @ _ ;
my $ qmpstatus = eval {
PVE::QemuConfig:: assert_config_exists_on_node ( $ vmid ) ;
mon_cmd ( $ vmid , "query-status" ) ;
} ;
warn "$@\n" if $@ ;
return $ qmpstatus && $ qmpstatus - > { status } eq "paused" ;
}
2021-06-22 14:30:30 +02:00
sub check_volume_storage_type {
my ( $ storecfg , $ vol ) = @ _ ;
my ( $ storeid , $ volname ) = PVE::Storage:: parse_volume_id ( $ vol ) ;
my $ scfg = PVE::Storage:: storage_config ( $ storecfg , $ storeid ) ;
my ( $ vtype ) = PVE::Storage:: parse_volname ( $ storecfg , $ vol ) ;
die "storage '$storeid' does not support content-type '$vtype'\n"
if ! $ scfg - > { content } - > { $ vtype } ;
return 1 ;
}
2022-08-24 18:26:43 +02:00
sub add_nets_bridge_fdb {
my ( $ conf , $ vmid ) = @ _ ;
2022-11-13 13:37:45 +01:00
for my $ opt ( keys %$ conf ) {
next if $ opt !~ m/^net(\d+)$/ ;
my $ iface = "tap${vmid}i$1" ;
2022-11-13 13:38:55 +01:00
# NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
my $ net = parse_net ( $ conf - > { $ opt } , 1 ) or next ;
my $ mac = $ net - > { macaddr } ;
if ( ! $ mac ) {
log_warn ( "MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!" )
if ! file_read_firstline ( "/sys/class/net/$iface/brport/learning" ) ;
next ;
}
2022-08-24 18:26:43 +02:00
2022-11-20 16:03:16 +01:00
my $ bridge = $ net - > { bridge } ;
2022-11-13 13:37:45 +01:00
if ( $ have_sdn ) {
2022-11-20 16:03:16 +01:00
PVE::Network::SDN::Zones:: add_bridge_fdb ( $ iface , $ mac , $ bridge , $ net - > { firewall } ) ;
2022-11-20 16:03:41 +01:00
} elsif ( - d "/sys/class/net/$bridge/bridge" ) { # avoid fdb management with OVS for now
2022-11-13 13:37:45 +01:00
PVE::Network:: add_bridge_fdb ( $ iface , $ mac , $ net - > { firewall } ) ;
2022-08-24 18:26:43 +02:00
}
}
}
2022-11-13 13:37:45 +01:00
2022-08-24 18:26:44 +02:00
sub del_nets_bridge_fdb {
my ( $ conf , $ vmid ) = @ _ ;
for my $ opt ( keys %$ conf ) {
next if $ opt !~ m/^net(\d+)$/ ;
my $ iface = "tap${vmid}i$1" ;
my $ net = parse_net ( $ conf - > { $ opt } ) or next ;
my $ mac = $ net - > { macaddr } or next ;
2022-11-20 16:03:16 +01:00
my $ bridge = $ net - > { bridge } ;
2022-08-24 18:26:44 +02:00
if ( $ have_sdn ) {
2022-11-20 16:03:16 +01:00
PVE::Network::SDN::Zones:: del_bridge_fdb ( $ iface , $ mac , $ bridge , $ net - > { firewall } ) ;
2022-11-20 16:03:41 +01:00
} elsif ( - d "/sys/class/net/$bridge/bridge" ) { # avoid fdb management with OVS for now
2022-08-24 18:26:44 +02:00
PVE::Network:: del_bridge_fdb ( $ iface , $ mac , $ net - > { firewall } ) ;
}
}
}
2011-08-23 07:47:04 +02:00
1 ;