mirror of
https://github.com/OpenNebula/one.git
synced 2025-01-08 21:17:43 +03:00
F #5940: Improve PCI Passthrough and SRIOV support
- SHORT_ADDRESS can be used to select specific devices (useful on
homogenous clusters or nic attach operations). This name has been
selected because of:
1. It is the attrbute shown host info
2. It does not collide with the reserved ADDRESS attribute
- New test and add functions that considers both allocation methods: by
name (VENDOR/CLASS/DEVICE) or address (SHORT_ADDRESS)
- Parameter check on VM creation
- revert and add method use the same pci_attribute function to add info
to the VM PCI attribute
- Remove well-known attributes when parsing PCI devices (ADDRESS,
PREV_ADDRES, BUS, FUNCTION, SLOT, NUMA_NODE, UUID)
- Support for attach and detach NIC with PCI attributes
* onevm_exec.rb looks for PCI devices for ATTACH=YES when attaching/detaching an interface
* script action are now written in Ruby
* KVM module with common actions (hostdev/interface device str)
* Minor changes in xmlparser and OpenNebulaVM classes
- PCI selection options to onevm nic-attach:
* pci short_address
* pci_device device ID
* pci_vendor vendor ID
* pci_class class ID
- VF can be configured by setting some parameters through IP link (e.g.
MAC or VLAN_ID). This commit includes a mixin to activate_vf
* one_vmm_exec.rb looks for PCI VN_MAD drivers
* VM class (VNM) adds a @pcis array
* activate_vf should be called in the pre stage. The following drivers
acticate VFs (VLAN_ID is implemented as 802.1Q tag)
- 802.1Q
- bridge
- fw
- ovswitch
* Supported attributes for VF:
- MAC
- VLAN_ID
- spoof checking (SPOOFCHK)
- trust (TRUST)
- Predictable PCI addresses for guests is only enabled if PCI bus 1
is present (PCI bridge bus cannot be hotplugged)
- Improve integration with Libvirt/QEMU:
* When attach, only activate the VF being attached
* Attach: Use <interface> and not <hostdev> for VF. There seems to be a race
condition between accessing the vfio device and permission setup.
* Attach: Remove address on attach as it may fail beacuse PCI controller
is not present, e.g.:
ATTACHNIC: Could not attach NIC to 28534240: error: Failed to attach device
from /dev/fd/63 error: XML error: Invalid PCI address 0000:01:01.0.
Only PCI buses up to 0 are available ExitCode: 1
* Detach: Detach always use <hostdev> as libvirt fails to identified the
device just by address when using <interface>
- Suntone Support: The "Hardware" profile of a network interface (NIC) of a VM can be of three types, now exposed in Sunstone:
* "Emulated" it includes the hardware model emulated by Qemu
* "PCI - Automatic" oned hw scheduler will pick the best PCI device for the NIC
* "PCI - Manual" user can specify the PCI device by its short-address as shown in host information
This commits also enables the attach/detach operations on PCI based NICs in Suntone. *Note*: only for KVM VMs
(reverts commit aafd5f3014
)
(reverts commit 671ef0b2d91aba00b1e6c63e19c4a50c3af20f6f)
This commit is contained in:
parent
a411993e5e
commit
74bc751bd7
@ -78,7 +78,7 @@ public:
|
||||
* @param devs list of requested PCI devices, will include address of
|
||||
* assigned devices.
|
||||
* @param vmid of the VM
|
||||
*
|
||||
*
|
||||
* @return true if the devices where added
|
||||
*
|
||||
* NOTE THIS FUNCTION DOES NOT PERFORM ANY ROLLBACK
|
||||
@ -117,7 +117,7 @@ public:
|
||||
/**
|
||||
* Gets a 4 hex digits value from attribute
|
||||
* @param name of the attribute
|
||||
* @pci_device VectorAttribute representing the device
|
||||
* @param pci_device VectorAttribute representing the device
|
||||
* @return the 0 if not found, -1 syntax error, >0 valid hex value
|
||||
*/
|
||||
static int get_pci_value(const char * name,
|
||||
|
@ -1414,6 +1414,12 @@ public:
|
||||
// ------------------------------------------------------------------------
|
||||
// NIC Hotplug related functions
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Checks the attributes of a PCI device
|
||||
*/
|
||||
int check_pci_attributes(VectorAttribute * pci, std::string& err);
|
||||
|
||||
/**
|
||||
* Generate and attach a new NIC attribute to the VM. This method check
|
||||
* that the NIC is compatible with the VM cluster allocation and fills SG
|
||||
|
@ -114,7 +114,7 @@ public:
|
||||
void to_xml_short(std::ostringstream& oss) const;
|
||||
|
||||
/**
|
||||
* Check is a nic is alias or not
|
||||
* Check if a nic is alias or not
|
||||
*/
|
||||
bool is_alias() const
|
||||
{
|
||||
@ -122,7 +122,7 @@ public:
|
||||
}
|
||||
|
||||
/**
|
||||
* Check is a nic is a PCI
|
||||
* Check if a nic is a PCI
|
||||
*/
|
||||
bool is_pci() const
|
||||
{
|
||||
|
@ -1116,6 +1116,8 @@ INHERIT_VNET_ATTR = "VXLAN_TEP"
|
||||
INHERIT_VNET_ATTR = "VXLAN_MC"
|
||||
INHERIT_VNET_ATTR = "CVLANS"
|
||||
INHERIT_VNET_ATTR = "QINQ_TYPE"
|
||||
INHERIT_VNET_ATTR = "TRUST"
|
||||
INHERIT_VNET_ATTR = "SPOOFCHK"
|
||||
|
||||
INHERIT_VNET_ATTR = "VCENTER_NET_REF"
|
||||
INHERIT_VNET_ATTR = "VCENTER_SWITCH_NAME"
|
||||
|
@ -144,14 +144,20 @@ bool HostSharePCI::test(const vector<VectorAttribute *> &devs) const
|
||||
{
|
||||
std::set<string> assigned;
|
||||
std::set<const VectorAttribute *> tested;
|
||||
unsigned int vendor_id, device_id, class_id;
|
||||
|
||||
// Test for "SHORT_ADDRESS" PCI selectio
|
||||
// Test for "SHORT_ADDRESS" PCI selection
|
||||
// and pre-allocated these first
|
||||
for (const auto& device : devs)
|
||||
{
|
||||
string short_addr = device->vector_value("SHORT_ADDRESS");
|
||||
|
||||
if (short_addr.empty())
|
||||
// Be aware of special case after migration, when
|
||||
// !short_addr.empty() and at least one of vendor/device/class is set
|
||||
if (short_addr.empty() ||
|
||||
get_pci_value("VENDOR", device, vendor_id) > 0 ||
|
||||
get_pci_value("DEVICE", device, device_id) > 0 ||
|
||||
get_pci_value("CLASS" , device, class_id) > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -301,12 +307,16 @@ bool HostSharePCI::add_by_name(VectorAttribute *device, int vmid)
|
||||
bool HostSharePCI::add(vector<VectorAttribute *> &devs, int vmid)
|
||||
{
|
||||
std::set<VectorAttribute *> added;
|
||||
unsigned int vendor_id, device_id, class_id;
|
||||
|
||||
for (auto& device : devs)
|
||||
{
|
||||
string short_addr = device->vector_value("SHORT_ADDRESS");
|
||||
|
||||
if (short_addr.empty())
|
||||
if (short_addr.empty() ||
|
||||
get_pci_value("VENDOR", device, vendor_id) > 0 ||
|
||||
get_pci_value("DEVICE", device, device_id) > 0 ||
|
||||
get_pci_value("CLASS" , device, class_id) > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -2546,6 +2546,8 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
|
||||
// -------------------------------------------------------------------------
|
||||
// Authorize the operation, restricted attributes & check quotas
|
||||
// -------------------------------------------------------------------------
|
||||
VectorAttribute * pci = tmpl.get("PCI");
|
||||
|
||||
if (auto vm = vmpool->get_ro(id))
|
||||
{
|
||||
vm->get_permissions(vm_perms);
|
||||
@ -2554,6 +2556,11 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
|
||||
{
|
||||
hid = vm->get_hid();
|
||||
}
|
||||
|
||||
if (pci != nullptr && vm->check_pci_attributes(pci, att.resp_msg) != 0)
|
||||
{
|
||||
return ACTION;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2596,8 +2603,6 @@ Request::ErrorCode VirtualMachineAttachNic::request_execute(int id,
|
||||
// -------------------------------------------------------------------------
|
||||
// PCI test and set
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
VectorAttribute * pci = tmpl.get("PCI");
|
||||
HostShareCapacity sr;
|
||||
|
||||
if ( pci != nullptr && hid != -1 )
|
||||
|
@ -3413,7 +3413,7 @@ int VirtualMachine::set_up_attach_nic(VirtualMachineTemplate * tmpl, string& err
|
||||
if ( is_pci )
|
||||
{
|
||||
Nebula& nd = Nebula::instance();
|
||||
string default_bus;
|
||||
string bus;
|
||||
|
||||
std::vector<const VectorAttribute*> pcis;
|
||||
|
||||
@ -3435,9 +3435,9 @@ int VirtualMachine::set_up_attach_nic(VirtualMachineTemplate * tmpl, string& err
|
||||
|
||||
_new_nic->replace("PCI_ID", max_pci_id + 1);
|
||||
|
||||
nd.get_configuration_attribute("PCI_PASSTHROUGH_BUS", default_bus);
|
||||
nd.get_configuration_attribute("PCI_PASSTHROUGH_BUS", bus);
|
||||
|
||||
if ( HostSharePCI::set_pci_address(_new_nic.get(), default_bus, false) != 0 )
|
||||
if ( HostSharePCI::set_pci_address(_new_nic.get(), bus, false) != 0 )
|
||||
{
|
||||
err = "Wrong BUS in PCI attribute";
|
||||
return -1;
|
||||
|
@ -315,8 +315,7 @@ int VirtualMachine::parse_vrouter(string& error_str, Template * tmpl)
|
||||
/* -------------------------------------------------------------------------- */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
static int check_pci_attributes(VectorAttribute * pci, const string& default_bus,
|
||||
string& error_str)
|
||||
int VirtualMachine::check_pci_attributes(VectorAttribute * pci, string& error_str)
|
||||
{
|
||||
static std::vector<std::string> attrs = {"VENDOR", "DEVICE", "CLASS"};
|
||||
bool found = false;
|
||||
@ -383,10 +382,16 @@ int VirtualMachine::parse_pci(string& error_str, Template * tmpl)
|
||||
|
||||
for (auto& attr : array_pci)
|
||||
{
|
||||
if ( check_pci_attributes(attr, default_bus, error_str) != 0 )
|
||||
if ( check_pci_attributes(attr, error_str) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ( HostSharePCI::set_pci_address(attr, default_bus, true) != 0 )
|
||||
{
|
||||
error_str = "Wrong BUS in PCI attribute";
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -65,7 +65,8 @@ domain = ARGV[0]
|
||||
vm = KvmVM.new(STDIN.read)
|
||||
|
||||
if vm.pci_attach?
|
||||
dev_xml = vm.hostdev_xml
|
||||
pci1 = vm.dumpxml_regexp(domain, "<alias name='pci.1'/>")
|
||||
dev_xml = vm.hostdev_xml(:pci => pci1)
|
||||
else
|
||||
dev_xml = vm.interface_xml
|
||||
end
|
||||
|
@ -87,7 +87,7 @@ def detach_nic(dom, mac)
|
||||
end
|
||||
|
||||
def detach_pci(dom, vm)
|
||||
dev_xml = vm.hostdev_xml(true)
|
||||
dev_xml = vm.hostdev_xml(:force_hostdev => true)
|
||||
|
||||
cmd =<<~EOS
|
||||
#{virsh} detach-device #{dom} <(
|
||||
@ -105,6 +105,7 @@ def detach_pci(dom, vm)
|
||||
|
||||
rc
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
|
@ -119,7 +119,7 @@ module VirtualMachineManagerKVM
|
||||
dev << '<virtualport type="openvswitch"/>'
|
||||
end
|
||||
|
||||
dev << xputs('<source bridge=%s/>', 'BRIDGE')
|
||||
dev << xputs("<source bridge=%s/>", 'BRIDGE')
|
||||
else
|
||||
dev = '<interface type="ethernet">'
|
||||
end
|
||||
@ -144,10 +144,10 @@ module VirtualMachineManagerKVM
|
||||
end
|
||||
|
||||
inb_keys = %w[INBOUND_AVG_BW INBOUND_PEAK_BW INBOUND_PEAK_KB]
|
||||
inbound = inb_keys.any? {|e| exist? e }
|
||||
inbound = inb_keys.any? {|e| exist? e }
|
||||
|
||||
outb_keys = %w[OUTBOUND_AVG_BW OUTBOUND_PEAK_BW OUTBOUND_PEAK_KB]
|
||||
outbound = outb_keys.any? {|e| exist? e }
|
||||
outbound = outb_keys.any? {|e| exist? e }
|
||||
|
||||
if inbound || outbound
|
||||
dev << '<bandwidth>'
|
||||
@ -191,6 +191,18 @@ module VirtualMachineManagerKVM
|
||||
!out.match(regexp).nil?
|
||||
end
|
||||
|
||||
def dumpxml_regexp(domain, str_exp)
|
||||
cmd = "#{virsh} dumpxml #{domain}"
|
||||
|
||||
out, _err, _rc = Open3.capture3(cmd)
|
||||
|
||||
return false if out.nil? || out.empty?
|
||||
|
||||
regexp = Regexp.new(str_exp)
|
||||
|
||||
!out.match(regexp).nil?
|
||||
end
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# This function generates a XML document to attach a new device
|
||||
# to the VM. The specification supports the same OpenNebula attributes.
|
||||
@ -198,20 +210,24 @@ module VirtualMachineManagerKVM
|
||||
# Example:
|
||||
#
|
||||
# <hostdev mode='subsystem' type='pci' managed='yes'>
|
||||
# <source>
|
||||
# <address domain='0x0000' bus='0x05' slot='0x02' function='0x0'/>
|
||||
# </source>
|
||||
# <address type='pci' domain='0x0' bus='0x01'
|
||||
# slot='0x01' function='0'/>
|
||||
# <source>
|
||||
# <address domain='0x0000' bus='0x05' slot='0x02' function='0x0'/>
|
||||
# </source>
|
||||
# <address type='pci' domain='0x0' bus='0x01' slot='0x01' function='0'/>
|
||||
# </hostdev>
|
||||
#
|
||||
# NOTE: Libvirt/QEMU seems to have a race condition accesing vfio device
|
||||
# and the permission check/set that makes <hostdev> not work for VF.
|
||||
#
|
||||
# NOTE: On detach (as we are managing MAC/VLAN through ip link vf)
|
||||
# devices needs to use <hostdev> format
|
||||
# NOTE: On detach (as we are manging MAC/VLAN through ip link vf) devices
|
||||
# needs to use <hostdev> format
|
||||
#-----------------------------------------------------------------------
|
||||
def hostdev_xml(force_hostdev = false)
|
||||
def hostdev_xml(_opts = {})
|
||||
opts = {
|
||||
:force_hostdev => false,
|
||||
:pci => false
|
||||
}.merge(_opts)
|
||||
|
||||
prefix_old = @xpath_prefix
|
||||
@xpath_prefix = "TEMPLATE/PCI[ATTACH='YES']/"
|
||||
|
||||
@ -220,7 +236,7 @@ module VirtualMachineManagerKVM
|
||||
dev << xputs('<source><address uuid=%s/></source>', 'UUID')
|
||||
dev << '</hostdev>'
|
||||
else
|
||||
if force_hostdev
|
||||
if opts[:force_hostdev]
|
||||
is_vf = false
|
||||
else
|
||||
is_vf = vf?(@xml["#{@xpath_prefix}SHORT_ADDRESS"])
|
||||
@ -241,6 +257,22 @@ module VirtualMachineManagerKVM
|
||||
dev << xputs(' slot=%s', 'SLOT', :hex => true)
|
||||
dev << xputs(' function=%s', 'FUNCTION', :hex => true)
|
||||
dev << '/></source>'
|
||||
|
||||
#Setting Bus address needs to check that a PCI contoller is
|
||||
#present for Bus 1
|
||||
vm_addr = %w[VM_DOMAIN VM_BUS VM_SLOT VM_FUNCTION].all? {|e|
|
||||
exist? e
|
||||
}
|
||||
|
||||
if vm_addr && opts[:pci]
|
||||
dev << '<address type="pci"'
|
||||
dev << xputs(' domain=%s', 'VM_DOMAIN')
|
||||
dev << xputs(' bus=%s', 'VM_BUS')
|
||||
dev << xputs(' slot=%s', 'VM_SLOT')
|
||||
dev << xputs(' function=%s', 'VM_FUNCTION')
|
||||
dev << '/>'
|
||||
end
|
||||
|
||||
dev << dev_end
|
||||
end
|
||||
|
||||
|
@ -87,7 +87,6 @@ module VNMMAD::VirtualFunction
|
||||
# virtfn /sys/devices/pci0000:80/0000:80:03.2/0000:85:00.0/virtfn3
|
||||
# _vf /sys/devices/pci0000:80/0000:80:03.2/0000:85:02.3
|
||||
# rubocop:enable Layout/LineLength
|
||||
|
||||
m = virtfn.match(/virtfn([0-9]+)/)
|
||||
|
||||
next if m.nil?
|
||||
@ -102,6 +101,8 @@ module VNMMAD::VirtualFunction
|
||||
cmd = "#{command(:ip)} link set #{pf_dev} vf #{m[1]}"
|
||||
cmd << " mac #{pci[:mac]}" if pci[:mac]
|
||||
cmd << " vlan #{pci[:vlan_id]}" if pci[:vlan_id]
|
||||
cmd << " spoofchk #{on_off(pci[:spoofchk])}" if pci[:spoofchk]
|
||||
cmd << " trust #{on_off(pci[:trust])}" if pci[:trust]
|
||||
|
||||
OpenNebula.exec_and_log(cmd)
|
||||
end
|
||||
@ -109,5 +110,12 @@ module VNMMAD::VirtualFunction
|
||||
# rubocop:enable Style/CombinableLoops
|
||||
end
|
||||
|
||||
def on_off(option)
|
||||
if option.match(/^yes$|^on$/i)
|
||||
"on"
|
||||
else
|
||||
"off"
|
||||
end
|
||||
end
|
||||
end
|
||||
# rubocop:enable Style/ClassAndModuleChildren
|
||||
|
Loading…
Reference in New Issue
Block a user