1
0
mirror of https://github.com/OpenNebula/one.git synced 2024-12-22 13:33:52 +03:00

F #5351: add support for NVIDIA vGPU (#1779)

This commit is contained in:
Alejandro Huertas Herrero 2022-02-17 15:51:27 +01:00 committed by GitHub
parent 4aa3e23a36
commit 7f719598bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 229 additions and 29 deletions

View File

@ -1043,7 +1043,8 @@ RUBY_AUTH_LIB_FILES="src/authm_mad/remotes/ssh/ssh_auth.rb \
MAD_SH_LIB_FILES="src/mad/sh/scripts_common.sh \ MAD_SH_LIB_FILES="src/mad/sh/scripts_common.sh \
src/mad/sh/create_container_image.sh \ src/mad/sh/create_container_image.sh \
src/mad/sh/create_docker_image.sh" src/mad/sh/create_docker_image.sh \
src/vmm_mad/remotes/kvm/vgpu"
MAD_RUBY_LIB_FILES="src/mad/ruby/scripts_common.rb" MAD_RUBY_LIB_FILES="src/mad/ruby/scripts_common.rb"

View File

@ -10,6 +10,7 @@ Cmnd_Alias ONE_MARKET = /usr/lib/one/sh/create_container_image.sh, /usr/lib/one/
Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip tuntap *, /usr/sbin/ip route *, /usr/sbin/ip neighbour * Cmnd_Alias ONE_NET = /usr/sbin/ebtables, /usr/sbin/iptables, /usr/sbin/ip6tables, /usr/sbin/ipset, /usr/sbin/ip link *, /usr/sbin/ip tuntap *, /usr/sbin/ip route *, /usr/sbin/ip neighbour *
Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl
Cmnd_Alias ONE_MEM = /usr/sbin/sysctl vm.drop_caches=3 vm.compact_memory=1 Cmnd_Alias ONE_MEM = /usr/sbin/sysctl vm.drop_caches=3 vm.compact_memory=1
Cmnd_Alias ONE_VGPU = /usr/lib/one/sh/vgpu
## Command aliases are enabled individually in dedicated ## Command aliases are enabled individually in dedicated
## sudoers files by each OpenNebula component (server, node). ## sudoers files by each OpenNebula component (server, node).

View File

@ -11,6 +11,7 @@ Cmnd_Alias ONE_MARKET = /usr/lib/one/sh/create_container_image.sh, /usr/lib/one/
Cmnd_Alias ONE_NET = /sbin/ebtables, /sbin/iptables, /sbin/ip6tables, /sbin/ipset, /sbin/ip link *, /sbin/ip tuntap *, /sbin/ip route *, /sbin/ip neighbour * Cmnd_Alias ONE_NET = /sbin/ebtables, /sbin/iptables, /sbin/ip6tables, /sbin/ipset, /sbin/ip link *, /sbin/ip tuntap *, /sbin/ip route *, /sbin/ip neighbour *
Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl Cmnd_Alias ONE_OVS = /usr/bin/ovs-ofctl, /usr/bin/ovs-vsctl
Cmnd_Alias ONE_MEM = /sbin/sysctl vm.drop_caches=3 vm.compact_memory=1 Cmnd_Alias ONE_MEM = /sbin/sysctl vm.drop_caches=3 vm.compact_memory=1
Cmnd_Alias ONE_VGPU = /usr/lib/one/sh/vgpu
## Command aliases are enabled individually in dedicated ## Command aliases are enabled individually in dedicated
## sudoers files by each OpenNebula component (server, node). ## sudoers files by each OpenNebula component (server, node).

View File

@ -1 +1 @@
oneadmin ALL=(ALL:ALL) NOPASSWD: ONE_CEPH, ONE_NET, ONE_OVS, ONE_LVM, ONE_MEM oneadmin ALL=(ALL:ALL) NOPASSWD: ONE_CEPH, ONE_NET, ONE_OVS, ONE_LVM, ONE_MEM, ONE_VGPU

View File

@ -17,7 +17,7 @@
# Holds configuration about sudoers requirements for OpeNebula # Holds configuration about sudoers requirements for OpeNebula
class Sudoers class Sudoers
NODECMDS = [:NET, :OVS, :LVM, :LXD, :MEM] NODECMDS = [:NET, :OVS, :LVM, :LXD, :MEM, :VGPU]
attr_accessor :cmds attr_accessor :cmds
@ -72,7 +72,8 @@ class Sudoers
lxc-create lxc-destroy lxc-info lxc-ls lxc-start lxc-stop lxc-create lxc-destroy lxc-info lxc-ls lxc-start lxc-stop
lxc-console e2fsck resize2fs xfs_growfs rbd-nbd lxc-console e2fsck resize2fs xfs_growfs rbd-nbd
], ],
:MEM => ['sysctl vm.drop_caches=3 vm.compact_memory=1'] :MEM => ['sysctl vm.drop_caches=3 vm.compact_memory=1'],
:VGPU => %w[/usr/lib/one/sh/vgpu]
} }
end end

View File

@ -130,7 +130,7 @@ bool HostSharePCI::test(const vector<VectorAttribute *> &devs) const
void HostSharePCI::add(vector<VectorAttribute *> &devs, int vmid) void HostSharePCI::add(vector<VectorAttribute *> &devs, int vmid)
{ {
unsigned int vendor_id, device_id, class_id; unsigned int vendor_id, device_id, class_id;
string address; string address, uuid;
int vendor_rc, device_rc, class_rc, addr_rc; int vendor_rc, device_rc, class_rc, addr_rc;
for (auto device : devs) for (auto device : devs)
@ -172,6 +172,13 @@ void HostSharePCI::add(vector<VectorAttribute *> &devs, int vmid)
device->replace("NUMA_NODE", node); device->replace("NUMA_NODE", node);
} }
uuid = dev->attrs->vector_value("UUID");
if ( !uuid.empty() )
{
device->replace("UUID", uuid);
}
break; break;
} }
} }

View File

@ -64,3 +64,8 @@
# - '^MegaRAID' # - '^MegaRAID'
# #
:device_name: [] :device_name: []
# List of NVIDIA vendor IDs, these are used to recognize PCI devices from
# NVIDIA and use vGPU feature
:nvidia_vendors:
- '10de'

View File

@ -25,9 +25,10 @@ begin
NAME = File.join(__dir__, "../../../../etc/im/#{ETC_NAME}/pci.conf") NAME = File.join(__dir__, "../../../../etc/im/#{ETC_NAME}/pci.conf")
CONF = { CONF = {
:filter => '0:0', :filter => '0:0',
:short_address => [], :short_address => [],
:device_name => [] :device_name => [],
:nvidia_vendors => ['10de']
}.merge(YAML.load_file(NAME)) }.merge(YAML.load_file(NAME))
rescue StandardError rescue StandardError
STDERR.puts "Invalid configuration #{NAME}" STDERR.puts "Invalid configuration #{NAME}"
@ -110,13 +111,18 @@ devices.each do |dev|
next if matched != true next if matched != true
end end
# The main device cannot be used, skip it
if CONF[:nvidia_vendors].include?(dev[:vendor]) &&
`ls /sys/class/mdev_bus | grep #{dev[:short_address]}`.empty?
next
end
puts 'PCI = [' puts 'PCI = ['
values = [ values = [
pval('TYPE', dev[:type]), pval('TYPE', dev[:type]),
pval('VENDOR', dev[:vendor]), pval('VENDOR', dev[:vendor]),
pval('VENDOR_NAME', dev[:vendor_name]), pval('VENDOR_NAME', dev[:vendor_name]),
pval('DEVICE', dev[:device]), pval('DEVICE', dev[:device]),
pval('DEVICE_NAME', dev[:device_name]),
pval('CLASS', dev[:class]), pval('CLASS', dev[:class]),
pval('CLASS_NAME', dev[:class_name]), pval('CLASS_NAME', dev[:class_name]),
pval('ADDRESS', dev[:address]), pval('ADDRESS', dev[:address]),
@ -128,6 +134,23 @@ devices.each do |dev|
pval('NUMA_NODE', dev[:numa_node]) pval('NUMA_NODE', dev[:numa_node])
] ]
# NVIDIA device
#
# The uuid is based on the address to get always the same
if CONF[:nvidia_vendors].include?(dev[:vendor])
values << pval(
'UUID',
`uuidgen --name '#{dev[:address]}' --namespace '@x500' --sha1`.strip
)
# When having vGPU the name is always Device, so we merge it with vendor
# name, in this way Sunstone shows a better name
values << pval('DEVICE_NAME',
"#{dev[:vendor_name]} #{dev[:device_name]}")
else
values << pval('DEVICE_NAME', dev[:device_name])
end
puts values.join(",\n") puts values.join(",\n")
puts ']' puts ']'
end end

View File

@ -602,6 +602,8 @@ int LibVirtDriver::deployment_description_kvm(
string vm_slot = ""; string vm_slot = "";
string vm_func = ""; string vm_func = "";
string uuid = "";
bool pae = false; bool pae = false;
bool acpi = false; bool acpi = false;
bool apic = false; bool apic = false;
@ -796,7 +798,7 @@ int LibVirtDriver::deployment_description_kvm(
bool boot_secure = false; bool boot_secure = false;
string firmware; string firmware;
get_attribute(vm, nullptr, nullptr, "OS", "FIRMWARE", firmware); get_attribute(vm, nullptr, nullptr, "OS", "FIRMWARE", firmware);
bool is_uefi = !firmware.empty() && !one_util::icasecmp(firmware, "BIOS"); bool is_uefi = !firmware.empty() && !one_util::icasecmp(firmware, "BIOS");
@ -1788,6 +1790,8 @@ int LibVirtDriver::deployment_description_kvm(
vm_slot = pci[i]->vector_value("VM_SLOT"); vm_slot = pci[i]->vector_value("VM_SLOT");
vm_func = pci[i]->vector_value("VM_FUNCTION"); vm_func = pci[i]->vector_value("VM_FUNCTION");
uuid = pci[i]->vector_value("UUID");
if ( domain.empty() || bus.empty() || slot.empty() || func.empty() ) if ( domain.empty() || bus.empty() || slot.empty() || func.empty() )
{ {
vm->log("VMM", Log::WARNING, vm->log("VMM", Log::WARNING,
@ -1796,26 +1800,38 @@ int LibVirtDriver::deployment_description_kvm(
continue; continue;
} }
file << "\t\t<hostdev mode='subsystem' type='pci' managed='yes'>\n"; if ( !uuid.empty() )
file << "\t\t\t<source>\n";
file << "\t\t\t\t<address "
<< " domain=" << one_util::escape_xml_attr("0x" + domain)
<< " bus=" << one_util::escape_xml_attr("0x" + bus)
<< " slot=" << one_util::escape_xml_attr("0x" + slot)
<< " function=" << one_util::escape_xml_attr("0x" + func)
<< "/>\n";
file << "\t\t\t</source>\n";
if ( !vm_domain.empty() && !vm_bus.empty() && !vm_slot.empty() &&
!vm_func.empty() )
{ {
file << "\t\t\t\t<address type='pci'" file << "\t\t<hostdev mode='subsystem' type='mdev' model='vfio-pci'>\n";
<< " domain=" << one_util::escape_xml_attr(vm_domain) file << "\t\t\t<source>\n";
<< " bus=" << one_util::escape_xml_attr(vm_bus) file << "\t\t\t\t<address "
<< " slot=" << one_util::escape_xml_attr(vm_slot) << " uuid=" << one_util::escape_xml_attr(uuid)
<< " function=" << one_util::escape_xml_attr(vm_func) << "/>\n";
<< "/>\n"; file << "\t\t\t</source>\n";
}
else
{
file << "\t\t<hostdev mode='subsystem' type='pci' managed='yes'>\n";
file << "\t\t\t<source>\n";
file << "\t\t\t\t<address "
<< " domain=" << one_util::escape_xml_attr("0x" + domain)
<< " bus=" << one_util::escape_xml_attr("0x" + bus)
<< " slot=" << one_util::escape_xml_attr("0x" + slot)
<< " function=" << one_util::escape_xml_attr("0x" + func)
<< "/>\n";
file << "\t\t\t</source>\n";
if ( !vm_domain.empty() && !vm_bus.empty() && !vm_slot.empty() &&
!vm_func.empty() )
{
file << "\t\t\t\t<address type='pci'"
<< " domain=" << one_util::escape_xml_attr(vm_domain)
<< " bus=" << one_util::escape_xml_attr(vm_bus)
<< " slot=" << one_util::escape_xml_attr(vm_slot)
<< " function=" << one_util::escape_xml_attr(vm_func)
<< "/>\n";
}
} }
file << "\t\t</hostdev>" << endl; file << "\t\t</hostdev>" << endl;

View File

@ -25,10 +25,16 @@ TIMEOUT=60
function destroy_and_monitor function destroy_and_monitor
{ {
# Get datastore path to get vm.xml
DATASTORE="$(/usr/lib/one/sh/vgpu "DATASTORE" "$deploy_id" "$(dirname "$0")")"
virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1 || return 0 virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1 || return 0
virsh --connect $LIBVIRT_URI destroy $deploy_id virsh --connect $LIBVIRT_URI destroy $deploy_id
# Destroy vGPU
sudo /usr/lib/one/sh/vgpu "DELETE" "$DATASTORE/vm.xml" "$(dirname "$0")"
virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1 virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1
[ "x$?" != "x0" ] [ "x$?" != "x0" ]
} }

View File

@ -36,6 +36,9 @@ if [ -n "${nvram}" ]; then
cp -n "${OVMF_NVRAM}" "${nvram}" cp -n "${OVMF_NVRAM}" "${nvram}"
fi fi
# Create vGPU following NVIDIA official guide: https://docs.nvidia.com/grid/latest/pdf/grid-vgpu-user-guide.pdf
sudo /usr/lib/one/sh/vgpu "CREATE" "$DEP_FILE_LOCATION/vm.xml" "$(dirname "$0")"
DATA=`virsh --connect $LIBVIRT_URI create $DEP_FILE` DATA=`virsh --connect $LIBVIRT_URI create $DEP_FILE`
if [ "x$?" = "x0" ]; then if [ "x$?" = "x0" ]; then

View File

@ -23,6 +23,9 @@ count=0
deploy_id=$1 deploy_id=$1
# Get datastore path to get vm.xml
DATASTORE="$(/usr/lib/one/sh/vgpu "DATASTORE" "$deploy_id" "$(dirname "$0")")"
shutdown_command="virsh --connect $LIBVIRT_URI shutdown $deploy_id" shutdown_command="virsh --connect $LIBVIRT_URI shutdown $deploy_id"
# Check if the domain is already shutdown # Check if the domain is already shutdown
@ -70,6 +73,9 @@ retry $TIMEOUT monitor
force_shutdown "$deploy_id" \ force_shutdown "$deploy_id" \
"virsh --connect $LIBVIRT_URI destroy $deploy_id" "virsh --connect $LIBVIRT_URI destroy $deploy_id"
# Destroy vGPU
sudo /usr/lib/one/sh/vgpu "DELETE" "$DATASTORE/vm.xml" "$(dirname "$0")"
# Compact memory # Compact memory
if [ "x$CLEANUP_MEMORY_ON_STOP" = "xyes" ]; then if [ "x$CLEANUP_MEMORY_ON_STOP" = "xyes" ]; then
sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 &>/dev/null & sudo -n sysctl vm.drop_caches=3 vm.compact_memory=1 &>/dev/null &

130
src/vmm_mad/remotes/kvm/vgpu Executable file
View File

@ -0,0 +1,130 @@
#!/bin/bash
# -------------------------------------------------------------------------- #
# Copyright 2002-2022, OpenNebula Project, OpenNebula Systems #
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
# not use this file except in compliance with the License. You may obtain #
# a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
#--------------------------------------------------------------------------- #
# ------------------------------------------------------------------------------
# HELPERS
# ------------------------------------------------------------------------------
# Get PCI devices UUID
function get_uuids() {
uuids="$(xmllint --format --xpath '/VM/TEMPLATE/PCI/UUID/text()' "$1" 2>/dev/null)"
echo "$uuids" | sed -e 's/<!\[CDATA\[//g; s/\]\]>//g'
}
# Get value from XML and remove CDATA part
function get_xpath_val() {
echo "$1" | xmllint --format --xpath "$2/text()" - | sed -e 's/<!\[CDATA\[//g; s/\]\]>//g'
}
# Get mdev path used to (de)activate mediated device
function get_mdev_path() {
pci="$(xmllint --format --xpath "/VM/TEMPLATE/PCI[UUID='$1']" "$2" 2>/dev/null)"
# Get specific information about the PCI
domain=$(get_xpath_val "$pci" "/PCI/DOMAIN")
bus=$(get_xpath_val "$pci" "/PCI/BUS")
slot=$(get_xpath_val "$pci" "/PCI/SLOT")
func=$(get_xpath_val "$pci" "/PCI/FUNCTION")
# Generate mdev path
mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func"
if [[ ! -d $mdev ]]
then
error_message "Directory '$mdev' does not exist"
exit 1
fi
# TODO: give the user the ability to choose this
device="$(ls "$mdev/mdev_supported_types" | head -n1)"
echo "$mdev/mdev_supported_types/$device/"
}
# ------------------------------------------------------------------------------
# ------------------------------------------------------------------------------
ACTION=${1,,}
# create -> vm.xml path
# delete -> vm.xml path
# datastore -> vm deploy ID
VM="$2"
# Variables from driver
DRIVER_PATH="$3"
XPATH="${DRIVER_PATH}/../../datastore/xpath.rb --stdin"
source "$DRIVER_PATH/../../etc/vmm/kvm/kvmrc"
source "$DRIVER_PATH/../../scripts_common.sh"
case "$ACTION" in
"create")
uuids="$(get_uuids "$VM")"
if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"
if ! echo "$uuid" > "$mdev/create"; then
error_message "Error creating mediated device"
exit 1
fi
done
fi
;;
"delete")
uuids="$(get_uuids "$VM")"
if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"
if ! echo "1" > "$mdev/devices/$uuid/remove"; then
error_message "Error removing mediated device"
# Not exit with error, just log the error
# exit -1
fi
done
fi
;;
"datastore")
METADATA_XML="$(virsh --connect "$LIBVIRT_URI" metadata "$VM" "$LIBVIRT_MD_URI" "$LIBVIRT_MD_KEY")"
unset i XPATH_ELEMENTS
while IFS= read -r -d '' element; do
XPATH_ELEMENTS[i++]="$element"
done < <(echo "$METADATA_XML" | $XPATH /vm/system_datastore/)
unset i
DATASTORE_PATH="${XPATH_ELEMENTS[i++]}"
if [ -z "$DATASTORE_PATH" ]; then
error_message "Datastore path not found"
exit 1
fi
echo "$DATASTORE_PATH"
;;
*)
error_message "Unsupported action '$ACTION'"
exit 1
esac