5
0
mirror of git://git.proxmox.com/git/qemu-server.git synced 2025-01-06 13:17:56 +03:00

enable cluster mapped PCI devices for guests

this patch allows configuring pci devices that are mapped via cluster
resource mapping when the user has 'Resource.Use' on the ACL path
'/mapping/pci/{ID}' (in  addition to the usual required vm config
privileges)

When given multiple mappings in the config, we use them as alternatives
for the passthrough, and will select the first free one on startup.
It is using our regular pci reservation mechanism for regular devices and
we introduce a selection mechanism for mediated devices.

A few changes to the inner workings were required to make this work well:
* parse_hostpci now returns a different structure where we have a list
  of lists (first level is for the different alternatives and second
  level is for the different devices that should be passed through
  together)
* factor out the 'parse_hostpci_devices' which parses each device from
  the config and does some precondition checks
* reserve_pci_usage now behaves slightly different when trying to
  reserve an device with the same VMID that's already reserved for,
  since for checking which alternative we can use, we already must
  reserve one (this means that qm showcmd can actually reserve devices,
  albeit only for up to 10 seconds)
* configuring a mediated device on a multifunction device is not
  supported anymore, and results in failure to start (previously, it
  just chose the first device to do it). This is a breaking change
* configuring a single pci device twice on different hostpci slots now
  fails during commandline generation instead on qemu start, so we had
  to adapt one test where this occurred (it could never have worked
  anyway)
* we now check permissions during clone/restore, meaning raw/real
  devices can only be cloned/restored by root@pam from now on.
  this is a breaking change.

Fixes #3574: Improve SR-IOV usability
Signed-off-by: Dominik Csapak <d.csapak@proxmox.com>
Tested-By:  Markus Frank <m.frank@proxmox.com>
This commit is contained in:
Dominik Csapak 2023-06-16 15:05:23 +02:00 committed by Thomas Lamprecht
parent e3971865b4
commit 9b71c34d61
6 changed files with 310 additions and 75 deletions

View File

@ -32,7 +32,8 @@ use PVE::QemuServer::Drive;
use PVE::QemuServer::ImportDisk; use PVE::QemuServer::ImportDisk;
use PVE::QemuServer::Monitor qw(mon_cmd); use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::Machine; use PVE::QemuServer::Machine;
use PVE::QemuServer::USB qw(parse_usb_device); use PVE::QemuServer::PCI;
use PVE::QemuServer::USB;
use PVE::QemuMigrate; use PVE::QemuMigrate;
use PVE::RPCEnvironment; use PVE::RPCEnvironment;
use PVE::AccessControl; use PVE::AccessControl;
@ -616,6 +617,37 @@ my sub check_vm_create_usb_perm {
return 1; return 1;
}; };
my sub check_hostpci_perm {
my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
return 1 if $authuser eq 'root@pam';
my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $value);
if ($device->{host}) {
die "only root can set '$opt' config for non-mapped devices\n";
} elsif ($device->{mapping}) {
$rpcenv->check_full($authuser, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.HWType']);
} else {
die "either 'host' or 'mapping' must be set.\n";
}
return 1;
}
my sub check_vm_create_hostpci_perm {
my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
return 1 if $authuser eq 'root@pam';
foreach my $opt (keys %{$param}) {
next if $opt !~ m/^hostpci\d+$/;
check_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
}
return 1;
};
my $check_vm_modify_config_perm = sub { my $check_vm_modify_config_perm = sub {
my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_; my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
@ -626,7 +658,7 @@ my $check_vm_modify_config_perm = sub {
# else, as there the permission can be value dependend # else, as there the permission can be value dependend
next if PVE::QemuServer::is_valid_drivename($opt); next if PVE::QemuServer::is_valid_drivename($opt);
next if $opt eq 'cdrom'; next if $opt eq 'cdrom';
next if $opt =~ m/^(?:unused|serial|usb)\d+$/; next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
next if $opt eq 'tags'; next if $opt eq 'tags';
@ -655,7 +687,7 @@ my $check_vm_modify_config_perm = sub {
# also needs privileges on the storage, that will be checked later # also needs privileges on the storage, that will be checked later
$rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk', 'VM.PowerMgmt' ]); $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk', 'VM.PowerMgmt' ]);
} else { } else {
# catches hostpci\d+, args, lock, etc. # catches args, lock, etc.
# new options will be checked here # new options will be checked here
die "only root can set '$opt' config\n"; die "only root can set '$opt' config\n";
} }
@ -894,6 +926,7 @@ __PACKAGE__->register_method({
&$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param); &$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param); check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param);
PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param); PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
&$check_cpu_model_access($rpcenv, $authuser, $param); &$check_cpu_model_access($rpcenv, $authuser, $param);
@ -1738,6 +1771,10 @@ my $update_vm_api = sub {
check_usb_perm($rpcenv, $authuser, $vmid, undef, $opt, $val); check_usb_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force); PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
PVE::QemuConfig->write_config($vmid, $conf); PVE::QemuConfig->write_config($vmid, $conf);
} elsif ($opt =~ m/^hostpci\d+$/) {
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
PVE::QemuConfig->write_config($vmid, $conf);
} elsif ($opt eq 'tags') { } elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser); assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
delete $conf->{$opt}; delete $conf->{$opt};
@ -1801,6 +1838,12 @@ my $update_vm_api = sub {
} }
check_usb_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt}); check_usb_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
$conf->{pending}->{$opt} = $param->{$opt}; $conf->{pending}->{$opt} = $param->{$opt};
} elsif ($opt =~ m/^hostpci\d+$/) {
if (my $oldvalue = $conf->{$opt}) {
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue);
}
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
$conf->{pending}->{$opt} = $param->{$opt};
} elsif ($opt eq 'tags') { } elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser); assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
$conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt}); $conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});

View File

@ -3724,8 +3724,8 @@ sub config_to_command {
my $bootorder = device_bootorder($conf); my $bootorder = device_bootorder($conf);
# host pci device passthrough # host pci device passthrough
my ($kvm_off, $gpu_passthrough, $legacy_igd) = PVE::QemuServer::PCI::print_hostpci_devices( my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
$vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder); $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
# usb devices # usb devices
my $usb_dev_features = {}; my $usb_dev_features = {};
@ -4144,7 +4144,7 @@ sub config_to_command {
push @$cmd, @$aa; push @$cmd, @$aa;
} }
return wantarray ? ($cmd, $vollist, $spice_port) : $cmd; return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
} }
sub check_rng_source { sub check_rng_source {
@ -5699,7 +5699,7 @@ sub vm_start_nolock {
print "Resuming suspended VM\n"; print "Resuming suspended VM\n";
} }
my ($cmd, $vollist, $spice_port) = config_to_command($storecfg, $vmid, my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
$conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'}); $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
my $migration_ip; my $migration_ip;
@ -5784,38 +5784,44 @@ sub vm_start_nolock {
my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume); my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
my $pci_devices = {}; # host pci devices my $pci_reserve_list = [];
for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) { for my $device (values $pci_devices->%*) {
my $dev = $conf->{"hostpci$i"} or next; next if $device->{mdev}; # we don't reserve for mdev devices
$pci_devices->{$i} = parse_hostpci($dev); push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
} }
# do not reserve pciid for mediated devices, sysfs will error out for duplicate assignment
my $real_pci_devices = [ grep { !(defined($_->{mdev}) && scalar($_->{pciid}->@*) == 1) } values $pci_devices->%* ];
# map to a flat list of pci ids
my $pci_id_list = [ map { $_->{id} } map { $_->{pciid}->@* } $real_pci_devices->@* ];
# reserve all PCI IDs before actually doing anything with them # reserve all PCI IDs before actually doing anything with them
PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, $start_timeout); PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
eval { eval {
my $uuid; my $uuid;
for my $id (sort keys %$pci_devices) { for my $id (sort keys %$pci_devices) {
my $d = $pci_devices->{$id}; my $d = $pci_devices->{$id};
for my $dev ($d->{pciid}->@*) { my ($index) = ($id =~ m/^hostpci(\d+)$/);
my $info = PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $id, $d->{mdev});
# nvidia grid needs the qemu parameter '-uuid' set my $chosen_mdev;
# use smbios uuid or mdev uuid as fallback for that for my $dev ($d->{ids}->@*) {
if ($d->{mdev} && !defined($uuid) && $info->{vendor} eq '10de') { my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
if (defined($conf->{smbios1})) { if ($d->{mdev}) {
my $smbios_conf = parse_smbios1($conf->{smbios1}); warn $@ if $@;
$uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid}); $chosen_mdev = $info;
} last if $chosen_mdev; # if successful, we're done
$uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $id) if !defined($uuid); } else {
die $@ if $@;
} }
} }
next if !$d->{mdev};
die "could not create mediated device\n" if !defined($chosen_mdev);
# nvidia grid needs the uuid of the mdev as qemu parameter
if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
if (defined($conf->{smbios1})) {
my $smbios_conf = parse_smbios1($conf->{smbios1});
$uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
}
$uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
}
} }
push @$cmd, '-uuid', $uuid if defined($uuid); push @$cmd, '-uuid', $uuid if defined($uuid);
}; };
@ -5929,7 +5935,7 @@ sub vm_start_nolock {
# re-reserve all PCI IDs now that we can know the actual VM PID # re-reserve all PCI IDs now that we can know the actual VM PID
my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid); my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_id_list, $vmid, undef, $pid) }; eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
warn $@ if $@; warn $@ if $@;
if (defined($res->{migrate})) { if (defined($res->{migrate})) {
@ -6124,9 +6130,7 @@ sub cleanup_pci_devices {
# some nvidia vgpu driver versions want to clean the mdevs up themselves, and error # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
# out when we do it first. so wait for 10 seconds and then try it # out when we do it first. so wait for 10 seconds and then try it
my $pciid = $d->{pciid}->[0]->{id}; if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
my $info = PVE::SysFSTools::pci_device_info("$pciid");
if ($info->{vendor} eq '10de') {
sleep 10; sleep 10;
} }
@ -6487,6 +6491,15 @@ sub check_mapping_access {
} else { } else {
die "either 'host' or 'mapping' must be set.\n"; die "either 'host' or 'mapping' must be set.\n";
} }
} elsif ($opt =~ m/^hostpci\d+$/) {
my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
if ($device->{host}) {
die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
} elsif ($device->{mapping}) {
$rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
} else {
die "either 'host' or 'mapping' must be set.\n";
}
} }
} }
}; };

View File

@ -4,6 +4,7 @@ use warnings;
use strict; use strict;
use PVE::JSONSchema; use PVE::JSONSchema;
use PVE::Mapping::PCI;
use PVE::SysFSTools; use PVE::SysFSTools;
use PVE::Tools; use PVE::Tools;
@ -22,6 +23,7 @@ my $PCIRE = qr/(?:[a-f0-9]{4,}:)?[a-f0-9]{2}:[a-f0-9]{2}(?:\.[a-f0-9])?/;
my $hostpci_fmt = { my $hostpci_fmt = {
host => { host => {
default_key => 1, default_key => 1,
optional => 1,
type => 'string', type => 'string',
pattern => qr/$PCIRE(;$PCIRE)*/, pattern => qr/$PCIRE(;$PCIRE)*/,
format_description => 'HOSTPCIID[;HOSTPCIID2...]', format_description => 'HOSTPCIID[;HOSTPCIID2...]',
@ -32,8 +34,18 @@ of PCI virtual functions of the host. HOSTPCIID syntax is:
'bus:dev.func' (hexadecimal numbers) 'bus:dev.func' (hexadecimal numbers)
You can us the 'lspci' command to list existing PCI devices. You can us the 'lspci' command to list existing PCI devices.
Either this or the 'mapping' key must be set.
EODESCR EODESCR
}, },
mapping => {
optional => 1,
type => 'string',
format_description => 'mapping-id',
format => 'pve-configid',
description => "The ID of a cluster wide mapping. Either this or the default-key 'host'"
." must be set.",
},
rombar => { rombar => {
type => 'boolean', type => 'boolean',
description => "Specify whether or not the device's ROM will be visible in the" description => "Specify whether or not the device's ROM will be visible in the"
@ -376,6 +388,32 @@ sub print_pcie_root_port {
return $res; return $res;
} }
# returns the parsed pci config but parses the 'host' part into
# a list if lists into the 'id' property like this:
#
# {
# mdev => 1,
# rombar => ...
# ...
# ids => [
# # this contains a list of alternative devices,
# [
# # which are itself lists of ids for one multifunction device
# {
# id => "0000:00:00.0",
# vendor => "...",
# },
# {
# id => "0000:00:00.1",
# vendor => "...",
# },
# ],
# [
# ...
# ],
# ...
# ],
# }
sub parse_hostpci { sub parse_hostpci {
my ($value) = @_; my ($value) = @_;
@ -383,50 +421,69 @@ sub parse_hostpci {
my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value); my $res = PVE::JSONSchema::parse_property_string($hostpci_fmt, $value);
my @idlist = split(/;/, $res->{host}); my $alternatives = [];
delete $res->{host}; my $host = delete $res->{host};
foreach my $id (@idlist) { my $mapping = delete $res->{mapping};
my $devs = PVE::SysFSTools::lspci($id);
die "no PCI device found for '$id'\n" if !scalar(@$devs); die "Cannot set both 'host' and 'mapping'.\n" if defined($host) && defined($mapping);
push @{$res->{pciid}}, @$devs;
if ($mapping) {
# we have no ordinary pci id, must be a mapping
my $devices = PVE::Mapping::PCI::find_on_current_node($mapping);
die "PCI device mapping not found for '$mapping'\n" if !$devices || !scalar($devices->@*);
for my $device ($devices->@*) {
eval { PVE::Mapping::PCI::assert_valid($mapping, $device) };
die "PCI device mapping invalid (hardware probably changed): $@\n" if $@;
push $alternatives->@*, [split(/;/, $device->{path})];
}
} elsif ($host) {
push $alternatives->@*, [split(/;/, $host)];
} else {
die "Either 'host' or 'mapping' must be set.\n";
} }
$res->{ids} = [];
for my $alternative ($alternatives->@*) {
my $ids = [];
foreach my $id ($alternative->@*) {
my $devs = PVE::SysFSTools::lspci($id);
die "no PCI device found for '$id'\n" if !scalar($devs->@*);
push $ids->@*, @$devs;
}
if (scalar($ids->@*) > 1) {
$res->{'has-multifunction'} = 1;
die "cannot use mediated device with multifunction device\n" if $res->{mdev};
}
push $res->{ids}->@*, $ids;
}
return $res; return $res;
} }
sub print_hostpci_devices { # parses all hostpci devices from a config and does some sanity checks
my ($vmid, $conf, $devices, $vga, $winversion, $q35, $bridges, $arch, $machine_type, $bootorder) = @_; # returns a hash like this:
# {
# hostpci0 => {
# # hash from parse_hostpci function
# },
# hostpci1 => { ... },
# ...
# }
sub parse_hostpci_devices {
my ($conf) = @_;
my $kvm_off = 0; my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
my $gpu_passthrough = 0;
my $legacy_igd = 0; my $legacy_igd = 0;
my $pciaddr; my $parsed_devices = {};
for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) { for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
my $id = "hostpci$i"; my $id = "hostpci$i";
my $d = parse_hostpci($conf->{$id}); my $d = parse_hostpci($conf->{$id});
next if !$d; next if !$d;
if (my $pcie = $d->{pcie}) { # check syntax
die "q35 machine model is not enabled" if !$q35; die "q35 machine model is not enabled" if !$q35 && $d->{pcie};
# win7 wants to have the pcie devices directly on the pcie bus
# instead of in the root port
if ($winversion == 7) {
$pciaddr = print_pcie_addr("${id}bus0");
} else {
# add more root ports if needed, 4 are present by default
# by pve-q35 cfgs, rest added here on demand.
if ($i > 3) {
push @$devices, '-device', print_pcie_root_port($i);
}
$pciaddr = print_pcie_addr($id);
}
} else {
my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id;
$pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type);
}
my $pcidevices = $d->{pciid};
my $multifunction = @$pcidevices > 1;
if ($d->{'legacy-igd'}) { if ($d->{'legacy-igd'}) {
die "only one device can be assigned in legacy-igd mode\n" die "only one device can be assigned in legacy-igd mode\n"
@ -444,11 +501,113 @@ sub print_hostpci_devices {
die "legacy IGD assignment is not compatible with q35\n" die "legacy IGD assignment is not compatible with q35\n"
if $q35; if $q35;
die "legacy IGD assignment is not compatible with multifunction devices\n" die "legacy IGD assignment is not compatible with multifunction devices\n"
if $multifunction; if $d->{'has-multifunction'};
die "legacy IGD assignment is not compatible with alternate devices\n"
if scalar($d->{ids}->@*) > 1;
# check first device for valid id
die "legacy IGD assignment only works for devices on host bus 00:02.0\n" die "legacy IGD assignment only works for devices on host bus 00:02.0\n"
if $pcidevices->[0]->{id} !~ m/02\.0$/; if $d->{ids}->[0]->[0]->{id} !~ m/02\.0$/;
} }
$parsed_devices->{$id} = $d;
}
return $parsed_devices;
}
# takes the hash returned by parse_hostpci_devices and for all non mdev gpus,
# selects one of the given alternatives by trying to reserve it
#
# mdev devices must be chosen later when we actually allocate it, but we
# flatten the inner list since there can only be one device per alternative anyway
my sub choose_hostpci_devices {
my ($devices, $vmid) = @_;
my $used = {};
my $add_used_device = sub {
my ($devices) = @_;
for my $used_device ($devices->@*) {
my $used_id = $used_device->{id};
die "device '$used_id' assigned more than once\n" if $used->{$used_id};
$used->{$used_id} = 1;
}
};
for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
my $device = $devices->{"hostpci$i"};
next if !$device;
if ($device->{mdev}) {
$device->{ids} = [ map { $_->[0] } $device->{ids}->@* ];
next;
}
if (scalar($device->{ids}->@* == 1)) {
# we only have one alternative, use that
$device->{ids} = $device->{ids}->[0];
$add_used_device->($device->{ids});
next;
}
my $found = 0;
for my $alternative ($device->{ids}->@*) {
my $ids = [map { $_->{id} } @$alternative];
next if grep { defined($used->{$_}) } @$ids; # already used
eval { reserve_pci_usage($ids, $vmid, 10, undef) };
next if $@;
# found one that is not used or reserved
$add_used_device->($alternative);
$device->{ids} = $alternative;
$found = 1;
last;
}
die "could not find a free device for 'hostpci$i'\n" if !$found;
}
return $devices;
}
sub print_hostpci_devices {
my ($vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder) = @_;
my $kvm_off = 0;
my $gpu_passthrough = 0;
my $legacy_igd = 0;
my $pciaddr;
my $pci_devices = choose_hostpci_devices(parse_hostpci_devices($conf), $vmid);
for (my $i = 0; $i < $MAX_HOSTPCI_DEVICES; $i++) {
my $id = "hostpci$i";
my $d = $pci_devices->{$id};
next if !$d;
$legacy_igd = 1 if $d->{'legacy-igd'};
if (my $pcie = $d->{pcie}) {
# win7 wants to have the pcie devices directly on the pcie bus
# instead of in the root port
if ($winversion == 7) {
$pciaddr = print_pcie_addr("${id}bus0");
} else {
# add more root ports if needed, 4 are present by default
# by pve-q35 cfgs, rest added here on demand.
if ($i > 3) {
push @$devices, '-device', print_pcie_root_port($i);
}
$pciaddr = print_pcie_addr($id);
}
} else {
my $pci_name = $d->{'legacy-igd'} ? 'legacy-igd' : $id;
$pciaddr = print_pci_addr($pci_name, $bridges, $arch, $machine_type);
}
my $num_devices = scalar($d->{ids}->@*);
my $multifunction = $num_devices > 1 && !$d->{mdev};
my $xvga = ''; my $xvga = '';
if ($d->{'x-vga'}) { if ($d->{'x-vga'}) {
$xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf'); $xvga = ',x-vga=on' if !($conf->{bios} && $conf->{bios} eq 'ovmf');
@ -458,15 +617,13 @@ sub print_hostpci_devices {
} }
my $sysfspath; my $sysfspath;
if ($d->{mdev} && scalar(@$pcidevices) == 1) { if ($d->{mdev}) {
my $uuid = generate_mdev_uuid($vmid, $i); my $uuid = generate_mdev_uuid($vmid, $i);
$sysfspath = "/sys/bus/mdev/devices/$uuid"; $sysfspath = "/sys/bus/mdev/devices/$uuid";
} elsif ($d->{mdev}) {
warn "ignoring mediated device '$id' with multifunction device\n";
} }
my $j = 0; for (my $j = 0; $j < $num_devices; $j++) {
foreach my $pcidevice (@$pcidevices) { my $pcidevice = $d->{ids}->[$j];
my $devicestr = "vfio-pci"; my $devicestr = "vfio-pci";
if ($sysfspath) { if ($sysfspath) {
@ -489,12 +646,13 @@ sub print_hostpci_devices {
} }
} }
push @$devices, '-device', $devicestr; push @$devices, '-device', $devicestr;
$j++; last if $d->{mdev};
} }
} }
return ($kvm_off, $gpu_passthrough, $legacy_igd); return ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices);
} }
sub prepare_pci_device { sub prepare_pci_device {
@ -596,6 +754,26 @@ sub reserve_pci_usage {
warn "leftover PCI reservation found for $id, lets take it...\n"; warn "leftover PCI reservation found for $id, lets take it...\n";
} }
} }
} elsif ($reservation) {
# already reserved by the same vmid
if (my $reserved_time = $reservation->{time}) {
if (defined($timeout)) {
# use the longer timeout
my $old_timeout = $reservation->{time} - 5 - $ctime;
$timeout = $old_timeout if $old_timeout > $timeout;
}
} elsif (my $reserved_pid = $reservation->{pid}) {
my $running_pid = PVE::QemuServer::Helpers::vm_running_locally($reservation->{vmid});
if (defined($running_pid) && $running_pid == $reservation->{pid}) {
if (defined($pid)) {
die "PCI device '$id' already in use by running VMID '$reservation->{vmid}'\n";
} elsif (defined($timeout)) {
# ignore timeout reservation for running vms, can happen with e.g.
# qm showcmd
return;
}
}
}
} }
$reservation_list->{$id} = { vmid => $vmid }; $reservation_list->{$id} = { vmid => $vmid };

View File

@ -8,7 +8,7 @@ hostpci1: d0:13.0,pcie=1
hostpci2: 00:f4.0 hostpci2: 00:f4.0
hostpci3: d0:15.1,pcie=1 hostpci3: d0:15.1,pcie=1
hostpci4: d0:17.0,pcie=1,rombar=0 hostpci4: d0:17.0,pcie=1,rombar=0
hostpci7: d0:15.1,pcie=1 hostpci7: d0:15.2,pcie=1
machine: q35 machine: q35
memory: 512 memory: 512
net0: virtio=2E:01:68:F9:9C:87,bridge=vmbr0 net0: virtio=2E:01:68:F9:9C:87,bridge=vmbr0

View File

@ -32,7 +32,7 @@
-device 'pcie-root-port,id=ich9-pcie-port-5,addr=10.0,x-speed=16,x-width=32,multifunction=on,bus=pcie.0,port=5,chassis=5' \ -device 'pcie-root-port,id=ich9-pcie-port-5,addr=10.0,x-speed=16,x-width=32,multifunction=on,bus=pcie.0,port=5,chassis=5' \
-device 'vfio-pci,host=0000:d0:17.0,id=hostpci4,bus=ich9-pcie-port-5,addr=0x0,rombar=0' \ -device 'vfio-pci,host=0000:d0:17.0,id=hostpci4,bus=ich9-pcie-port-5,addr=0x0,rombar=0' \
-device 'pcie-root-port,id=ich9-pcie-port-8,addr=10.3,x-speed=16,x-width=32,multifunction=on,bus=pcie.0,port=8,chassis=8' \ -device 'pcie-root-port,id=ich9-pcie-port-8,addr=10.3,x-speed=16,x-width=32,multifunction=on,bus=pcie.0,port=8,chassis=8' \
-device 'vfio-pci,host=0000:d0:15.1,id=hostpci7,bus=ich9-pcie-port-8,addr=0x0' \ -device 'vfio-pci,host=0000:d0:15.2,id=hostpci7,bus=ich9-pcie-port-8,addr=0x0' \
-device 'VGA,id=vga,bus=pcie.0,addr=0x1' \ -device 'VGA,id=vga,bus=pcie.0,addr=0x1' \
-device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' \ -device 'virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3,free-page-reporting=on' \
-iscsi 'initiator-name=iqn.1993-08.org.debian:01:aabbccddeeff' \ -iscsi 'initiator-name=iqn.1993-08.org.debian:01:aabbccddeeff' \

View File

@ -81,6 +81,7 @@ my $pci_devs = [
"0000:0f:f2.0", "0000:0f:f2.0",
"0000:d0:13.0", "0000:d0:13.0",
"0000:d0:15.1", "0000:d0:15.1",
"0000:d0:15.2",
"0000:d0:17.0", "0000:d0:17.0",
"0000:f0:42.0", "0000:f0:42.0",
"0000:f0:43.0", "0000:f0:43.0",