1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-01-08 21:17:43 +03:00

F #5940: Improve PCI Passthrough and SRIOV support

- SHORT_ADDRESS can be used to select specific devices  (useful on
  homogenous clusters or nic attach operations). This name has been
  selected because of:
    1. It is the attrbute shown host info
    2. It does not collide with the reserved ADDRESS attribute

- New test and add functions that considers both allocation methods: by
  name (VENDOR/CLASS/DEVICE) or address (SHORT_ADDRESS)

- Parameter check on VM creation

- revert and add method use the same pci_attribute function to add info
  to the VM PCI attribute

- Remove well-known attributes when parsing PCI devices (ADDRESS,
  PREV_ADDRES, BUS, FUNCTION, SLOT, NUMA_NODE, UUID)

- Support for attach and detach NIC with PCI attributes
    * onevm_exec.rb looks for PCI devices for ATTACH=YES when attaching/detaching an interface
    * script action are now written in Ruby
    * KVM module with common actions (hostdev/interface device str)
    * Minor changes in xmlparser and OpenNebulaVM classes

- PCI selection options to onevm nic-attach:
    * pci short_address
    * pci_device device ID
    * pci_vendor vendor ID
    * pci_class class ID

- VF can be configured by setting some parameters through IP link (e.g.
  MAC or VLAN_ID). This commit includes a mixin to activate_vf

    * one_vmm_exec.rb looks for PCI VN_MAD drivers
    * VM class (VNM) adds a @pcis array
    * activate_vf should be called in the pre stage. The following drivers
      acticate VFs (VLAN_ID is implemented as 802.1Q tag)
            - 802.1Q
            - bridge
            - fw
            - ovswitch
    * Supported attributes for VF:
      - MAC
      - VLAN_ID
      - spoof checking (SPOOFCHK)
      - trust (TRUST)

- Predictable PCI addresses for guests is only enabled if PCI bus 1
  is present (PCI bridge bus cannot be hotplugged)

- Improve integration with Libvirt/QEMU:

    * When attach, only activate the VF being attached
    * Attach: Use <interface> and not <hostdev> for VF. There seems to be a race
      condition between accessing the vfio device and permission setup.
    * Attach: Remove address on attach as it may fail beacuse PCI controller
      is not present, e.g.:

        ATTACHNIC: Could not attach NIC to 28534240: error: Failed to attach device
        from /dev/fd/63 error: XML error: Invalid PCI address 0000:01:01.0.
        Only PCI buses up to 0 are available ExitCode: 1

    * Detach: Detach always use <hostdev> as libvirt fails to identified the
      device just by address when using <interface>

- Suntone Support: The "Hardware" profile of a network interface (NIC) of a VM can be of three types, now exposed in Sunstone:

    * "Emulated" it includes the hardware model emulated by Qemu
    * "PCI - Automatic" oned hw scheduler will pick the best PCI device for the NIC
    * "PCI - Manual" user can specify the PCI device by its short-address as shown in host information

This commits also enables the attach/detach operations on PCI based NICs in Suntone. *Note*: only for KVM VMs

(reverts commit aafd5f3014)
(reverts commit 671ef0b2d91aba00b1e6c63e19c4a50c3af20f6f)
This commit is contained in:
Ruben S. Montero 2022-10-04 20:16:09 +02:00
parent a411993e5e
commit 74bc751bd7
No known key found for this signature in database
GPG Key ID: A0CEA6FA880A1D87
12 changed files with 100 additions and 30 deletions

View File

@ -78,7 +78,7 @@ public:
* @param devs list of requested PCI devices, will include address of
* assigned devices.
* @param vmid of the VM
*
*
* @return true if the devices where added
*
* NOTE THIS FUNCTION DOES NOT PERFORM ANY ROLLBACK
@ -117,7 +117,7 @@ public:
/**
* Gets a 4 hex digits value from attribute
* @param name of the attribute
* @pci_device VectorAttribute representing the device
* @param pci_device VectorAttribute representing the device
* @return the 0 if not found, -1 syntax error, >0 valid hex value
*/
static int get_pci_value(const char * name,

View File

@ -1414,6 +1414,12 @@ public:
// ------------------------------------------------------------------------
// NIC Hotplug related functions
// ------------------------------------------------------------------------
/**
* Checks the attributes of a PCI device
*/
int check_pci_attributes(VectorAttribute * pci, std::string& err);
/**
* Generate and attach a new NIC attribute to the VM. This method check
* that the NIC is compatible with the VM cluster allocation and fills SG

View File

@ -114,7 +114,7 @@ public:
void to_xml_short(std::ostringstream& oss) const;
/**
* Check is a nic is alias or not
* Check if a nic is alias or not
*/
bool is_alias() const
{
@ -122,7 +122,7 @@ public:
}
/**
* Check is a nic is a PCI
* Check if a nic is a PCI
*/
bool is_pci() const
{

View File

@ -1116,6 +1116,8 @@ INHERIT_VNET_ATTR = "VXLAN_TEP"
INHERIT_VNET_ATTR = "VXLAN_MC"
INHERIT_VNET_ATTR = "CVLANS"
INHERIT_VNET_ATTR = "QINQ_TYPE"
INHERIT_VNET_ATTR = "TRUST"
INHERIT_VNET_ATTR = "SPOOFCHK"
INHERIT_VNET_ATTR = "VCENTER_NET_REF"
INHERIT_VNET_ATTR = "VCENTER_SWITCH_NAME"

View File

@ -144,14 +144,20 @@ bool HostSharePCI::test(const vector<VectorAttribute *> &devs) const
{
std::set<string> assigned;
std::set<const VectorAttribute *> tested;
unsigned int vendor_id, device_id, class_id;
// Test for "SHORT_ADDRESS" PCI selectio
// Test for "SHORT_ADDRESS" PCI selection
// and pre-allocated these first
for (const auto& device : devs)
{
string short_addr = device->vector_value("SHORT_ADDRESS");
if (short_addr.empty())
// Be aware of special case after migration, when
// !short_addr.empty() and at least one of vendor/device/class is set
if (short_addr.empty() ||
get_pci_value("VENDOR", device, vendor_id) > 0 ||
get_pci_value("DEVICE", device, device_id) > 0 ||
get_pci_value("CLASS" , device, class_id) > 0)
{
continue;
}
@ -301,12 +307,16 @@ bool HostSharePCI::add_by_name(VectorAttribute *device, int vmid)
bool HostSharePCI::add(vector<VectorAttribute *> &devs, int vmid)
{
std::set<VectorAttribute *> added;
unsigned int vendor_id, device_id, class_id;
for (auto& device : devs)
{
string short_addr = device->vector_value("SHORT_ADDRESS");
if (short_addr.empty())
if (short_addr.empty() ||
get_pci_value("VENDOR", device, vendor_id) > 0 ||
get_pci_value("DEVICE", device, device_id) > 0 ||
get_pci_value("CLASS" , device, class_id) > 0)
{
continue;
}

View File

@ -2546,6 +2546,8 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
// -------------------------------------------------------------------------
// Authorize the operation, restricted attributes & check quotas
// -------------------------------------------------------------------------
VectorAttribute * pci = tmpl.get("PCI");
if (auto vm = vmpool->get_ro(id))
{
vm->get_permissions(vm_perms);
@ -2554,6 +2556,11 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
{
hid = vm->get_hid();
}
if (pci != nullptr && vm->check_pci_attributes(pci, att.resp_msg) != 0)
{
return ACTION;
}
}
else
{
@ -2596,8 +2603,6 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
// -------------------------------------------------------------------------
// PCI test and set
// -------------------------------------------------------------------------
VectorAttribute * pci = tmpl.get("PCI");
HostShareCapacity sr;
if ( pci != nullptr && hid != -1 )

View File

@ -3413,7 +3413,7 @@ int VirtualMachine::set_up_attach_nic(VirtualMachineTemplate * tmpl, string& err
if ( is_pci )
{
Nebula& nd = Nebula::instance();
string default_bus;
string bus;
std::vector<const VectorAttribute*> pcis;
@ -3435,9 +3435,9 @@ int VirtualMachine::set_up_attach_nic(VirtualMachineTemplate * tmpl, string& err
_new_nic->replace("PCI_ID", max_pci_id + 1);
nd.get_configuration_attribute("PCI_PASSTHROUGH_BUS", default_bus);
nd.get_configuration_attribute("PCI_PASSTHROUGH_BUS", bus);
if ( HostSharePCI::set_pci_address(_new_nic.get(), default_bus, false) != 0 )
if ( HostSharePCI::set_pci_address(_new_nic.get(), bus, false) != 0 )
{
err = "Wrong BUS in PCI attribute";
return -1;

View File

@ -315,8 +315,7 @@ int VirtualMachine::parse_vrouter(string& error_str, Template * tmpl)
/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */
static int check_pci_attributes(VectorAttribute * pci, const string& default_bus,
string& error_str)
int VirtualMachine::check_pci_attributes(VectorAttribute * pci, string& error_str)
{
static std::vector<std::string> attrs = {"VENDOR", "DEVICE", "CLASS"};
bool found = false;
@ -383,10 +382,16 @@ int VirtualMachine::parse_pci(string& error_str, Template * tmpl)
for (auto& attr : array_pci)
{
if ( check_pci_attributes(attr, default_bus, error_str) != 0 )
if ( check_pci_attributes(attr, error_str) != 0 )
{
return -1;
}
if ( HostSharePCI::set_pci_address(attr, default_bus, true) != 0 )
{
error_str = "Wrong BUS in PCI attribute";
return -1;
}
}
return 0;

View File

@ -65,7 +65,8 @@ domain = ARGV[0]
vm = KvmVM.new(STDIN.read)
if vm.pci_attach?
dev_xml = vm.hostdev_xml
pci1 = vm.dumpxml_regexp(domain, "<alias name='pci.1'/>")
dev_xml = vm.hostdev_xml(:pci => pci1)
else
dev_xml = vm.interface_xml
end

View File

@ -87,7 +87,7 @@ def detach_nic(dom, mac)
end
def detach_pci(dom, vm)
dev_xml = vm.hostdev_xml(true)
dev_xml = vm.hostdev_xml(:force_hostdev => true)
cmd =<<~EOS
#{virsh} detach-device #{dom} <(
@ -105,6 +105,7 @@ def detach_pci(dom, vm)
rc
end
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------

View File

@ -119,7 +119,7 @@ module VirtualMachineManagerKVM
dev << '<virtualport type="openvswitch"/>'
end
dev << xputs('<source bridge=%s/>', 'BRIDGE')
dev << xputs("<source bridge=%s/>", 'BRIDGE')
else
dev = '<interface type="ethernet">'
end
@ -144,10 +144,10 @@ module VirtualMachineManagerKVM
end
inb_keys = %w[INBOUND_AVG_BW INBOUND_PEAK_BW INBOUND_PEAK_KB]
inbound = inb_keys.any? {|e| exist? e }
inbound = inb_keys.any? {|e| exist? e }
outb_keys = %w[OUTBOUND_AVG_BW OUTBOUND_PEAK_BW OUTBOUND_PEAK_KB]
outbound = outb_keys.any? {|e| exist? e }
outbound = outb_keys.any? {|e| exist? e }
if inbound || outbound
dev << '<bandwidth>'
@ -191,6 +191,18 @@ module VirtualMachineManagerKVM
!out.match(regexp).nil?
end
def dumpxml_regexp(domain, str_exp)
cmd = "#{virsh} dumpxml #{domain}"
out, _err, _rc = Open3.capture3(cmd)
return false if out.nil? || out.empty?
regexp = Regexp.new(str_exp)
!out.match(regexp).nil?
end
#-----------------------------------------------------------------------
# This function generates a XML document to attach a new device
# to the VM. The specification supports the same OpenNebula attributes.
@ -198,20 +210,24 @@ module VirtualMachineManagerKVM
# Example:
#
# <hostdev mode='subsystem' type='pci' managed='yes'>
# <source>
# <address domain='0x0000' bus='0x05' slot='0x02' function='0x0'/>
# </source>
# <address type='pci' domain='0x0' bus='0x01'
# slot='0x01' function='0'/>
# <source>
# <address domain='0x0000' bus='0x05' slot='0x02' function='0x0'/>
# </source>
# <address type='pci' domain='0x0' bus='0x01' slot='0x01' function='0'/>
# </hostdev>
#
# NOTE: Libvirt/QEMU seems to have a race condition accesing vfio device
# and the permission check/set that makes <hostdev> not work for VF.
#
# NOTE: On detach (as we are managing MAC/VLAN through ip link vf)
# devices needs to use <hostdev> format
# NOTE: On detach (as we are manging MAC/VLAN through ip link vf) devices
# needs to use <hostdev> format
#-----------------------------------------------------------------------
def hostdev_xml(force_hostdev = false)
def hostdev_xml(_opts = {})
opts = {
:force_hostdev => false,
:pci => false
}.merge(_opts)
prefix_old = @xpath_prefix
@xpath_prefix = "TEMPLATE/PCI[ATTACH='YES']/"
@ -220,7 +236,7 @@ module VirtualMachineManagerKVM
dev << xputs('<source><address uuid=%s/></source>', 'UUID')
dev << '</hostdev>'
else
if force_hostdev
if opts[:force_hostdev]
is_vf = false
else
is_vf = vf?(@xml["#{@xpath_prefix}SHORT_ADDRESS"])
@ -241,6 +257,22 @@ module VirtualMachineManagerKVM
dev << xputs(' slot=%s', 'SLOT', :hex => true)
dev << xputs(' function=%s', 'FUNCTION', :hex => true)
dev << '/></source>'
#Setting Bus address needs to check that a PCI contoller is
#present for Bus 1
vm_addr = %w[VM_DOMAIN VM_BUS VM_SLOT VM_FUNCTION].all? {|e|
exist? e
}
if vm_addr && opts[:pci]
dev << '<address type="pci"'
dev << xputs(' domain=%s', 'VM_DOMAIN')
dev << xputs(' bus=%s', 'VM_BUS')
dev << xputs(' slot=%s', 'VM_SLOT')
dev << xputs(' function=%s', 'VM_FUNCTION')
dev << '/>'
end
dev << dev_end
end

View File

@ -87,7 +87,6 @@ module VNMMAD::VirtualFunction
# virtfn /sys/devices/pci0000:80/0000:80:03.2/0000:85:00.0/virtfn3
# _vf /sys/devices/pci0000:80/0000:80:03.2/0000:85:02.3
# rubocop:enable Layout/LineLength
m = virtfn.match(/virtfn([0-9]+)/)
next if m.nil?
@ -102,6 +101,8 @@ module VNMMAD::VirtualFunction
cmd = "#{command(:ip)} link set #{pf_dev} vf #{m[1]}"
cmd << " mac #{pci[:mac]}" if pci[:mac]
cmd << " vlan #{pci[:vlan_id]}" if pci[:vlan_id]
cmd << " spoofchk #{on_off(pci[:spoofchk])}" if pci[:spoofchk]
cmd << " trust #{on_off(pci[:trust])}" if pci[:trust]
OpenNebula.exec_and_log(cmd)
end
@ -109,5 +110,12 @@ module VNMMAD::VirtualFunction
# rubocop:enable Style/CombinableLoops
end
def on_off(option)
if option.match(/^yes$|^on$/i)
"on"
else
"off"
end
end
end
# rubocop:enable Style/ClassAndModuleChildren