1
0
mirror of https://github.com/OpenNebula/one.git synced 2025-03-12 08:58:17 +03:00

F #6841: NVIDIA vendor mediated devices framwork

This commit add support for the NVIDIA mdev framework intriduce with
Ubuntu 24.04 release. The change includes:

- Type of mdev is added by a new monitor attribute MDEV_MODE
  (<'legacy'|'nvidia'>). An empty MDEV_MODE in the PCI device defaults
  to 'legacy'
- Profile monitoring has been also adapted to the new framework.
- vgpu has been extended to prepare the vGPU prior to VM boot according
  to the new framework.
- KVM domain generates the PCI device using the type='pci' and managed='no' to
  accomodate the new mdev interface.

Older OS/driver versions will use the legacy interface.

(cherry picked from commit 216c329b650a64034220f9fed5e5c5425ea8eabd)
This commit is contained in:
Ruben S. Montero 2025-01-11 16:46:26 +01:00
parent ef932b67bd
commit 0dfaa48041
No known key found for this signature in database
GPG Key ID: A0CEA6FA880A1D87
4 changed files with 152 additions and 46 deletions

View File

@ -200,7 +200,7 @@ void HostSharePCI::pci_attribute(VectorAttribute *device, PCIDevice *pci,
"ADDRESS", "SHORT_ADDRESS"
};
static vector<string> cp_check_attr = {"NUMA_NODE", "UUID"};
static vector<string> cp_check_attr = {"NUMA_NODE", "UUID", "MDEV_MODE"};
//Save previous address for migrations, clear on revert - failed migration
if (set_prev)
@ -523,7 +523,8 @@ int HostSharePCI::set_pci_address(VectorAttribute * pci_device,
// ------------------- Remove well-known attributes -----------------------
static vector<string> rm_attr = {"DOMAIN", "BUS", "SLOT", "FUNCTION",
"ADDRESS", "PREV_ADDRESS", "NUMA_NODE", "UUID"
"ADDRESS", "PREV_ADDRESS", "NUMA_NODE",
"UUID", "MDEV_MODE"
};
if (clean)

View File

@ -91,9 +91,61 @@ def get_devices(filter = nil)
end.flatten
end
def pci_bus_path(device)
"/sys/bus/pci/devices/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
end
def mdev_bus_path(device)
"/sys/class/mdev_bus/0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
end
def device_attr?(device, attribute)
addr = "0000:#{device[:bus]}:#{device[:slot]}.#{device[:function]}"
!`ls -l /sys/bus/pci/devices/#{addr}/ | grep #{attribute}`.empty?
File.exist? File.join(pci_bus_path(device), attribute)
end
def virtfn?(device)
device_attr?(device, 'physfn')
end
# rubocop:disable Naming/PredicateName
def has_virtfn?(device)
device_attr?(device, 'virtfn')
end
# rubocop:enable Naming/PredicateName
def legacy?(device)
File.exist? File.join mdev_bus_path(device)
end
def legacy_profiles(device)
path = File.join(mdev_bus_path(device), 'mdev_supported_types')
return [] unless File.exist? path
`ls #{path}`.split('\n')
rescue StandardError
[]
end
def nvidia_profiles(device)
path = File.join(pci_bus_path(device), 'nvidia/creatable_vgpu_types')
profiles = []
File.read(path).each_line do |line|
id, name = line.split(':')
id.strip!
name.strip!
next if id.empty? || id.casecmp?('ID') || name.empty?
profiles << "#{id} (#{name})"
end
profiles
rescue StandardError
[]
end
filter = CONF[:filter]
@ -117,8 +169,7 @@ devices.each do |dev|
end
# Skip NVIDIA cards with virtual functions
next if CONF[:nvidia_vendors].include?(dev[:vendor]) &&
device_attr?(dev, 'virtfn')
next if CONF[:nvidia_vendors].include?(dev[:vendor]) && has_virtfn?(dev)
puts 'PCI = ['
values = [
@ -139,26 +190,29 @@ devices.each do |dev|
# NVIDIA GPU device
if CONF[:nvidia_vendors].include?(dev[:vendor])
# When having NVIDIA GPU the name is always Device, so we merge
# it with vendor name, in this way Sunstone shows a better name
values << pval('DEVICE_NAME',
"#{dev[:vendor_name]} #{dev[:device_name]}")
# Better name for NVIDIA GPUs
values << pval('DEVICE_NAME', "#{dev[:vendor_name]} #{dev[:device_name]}")
# For vGPU, the uuid is based on the address to get always the same
if device_attr?(dev, 'physfn')
if virtfn?(dev)
# For vGPU, the uuid is based on the address to get always the same
values << pval(
'UUID',
`uuidgen --name '#{dev[:address]}' \
--namespace '@x500' --sha1`.strip
)
# Get profiles
addr = "0000:#{dev[:bus]}:#{dev[:slot]}.#{dev[:function]}"
profiles = `ls /sys/class/mdev_bus/#{addr}/mdev_supported_types`
profiles = profiles.split("\n")
# Comma separated value with different profiles
profiles = legacy_profiles(dev)
profiles = nvidia_profiles(dev) if profiles.empty?
values << pval('PROFILES', profiles.join(','))
mdev_mode = if legacy?(dev)
'legacy'
else
'nvidia'
end
values << pval('MDEV_MODE', mdev_mode)
end
else
values << pval('DEVICE_NAME', dev[:device_name])

View File

@ -2082,6 +2082,9 @@ int LibVirtDriver::deployment_description_kvm(
vm_func = pci[i]->vector_value("VM_FUNCTION");
string uuid = pci[i]->vector_value("UUID");
string mdev = pci[i]->vector_value("MDEV_MODE");
one_util::tolower(mdev);
if ( domain.empty() || bus.empty() || slot.empty() || func.empty() )
{
@ -2091,7 +2094,7 @@ int LibVirtDriver::deployment_description_kvm(
continue;
}
if ( !uuid.empty() )
if ( !uuid.empty() && (mdev == "legacy" || mdev.empty()) )
{
file << "\t\t<hostdev mode='subsystem' type='mdev' model='vfio-pci'>\n";
file << "\t\t\t<source>\n";
@ -2102,7 +2105,16 @@ int LibVirtDriver::deployment_description_kvm(
}
else
{
file << "\t\t<hostdev mode='subsystem' type='pci' managed='yes'>\n";
file << "\t\t<hostdev mode='subsystem' type='pci' ";
if ( mdev == "nvidia" )
{
file << "managed='no'>\n";
}
else
{
file << "managed='yes'>\n";
}
file << "\t\t\t<source>\n";
file << "\t\t\t\t<address "

View File

@ -32,7 +32,7 @@ function get_xpath_val() {
}
# Get mdev path used to (de)activate mediated device
function get_mdev_path() {
function vgpuctl() {
pci="$(xmllint --format --xpath "/VM/TEMPLATE/PCI[UUID='$1']" "$2" 2>/dev/null)"
# Get specific information about the PCI
@ -41,22 +41,71 @@ function get_mdev_path() {
slot=$(get_xpath_val "$pci" "/PCI/SLOT")
func=$(get_xpath_val "$pci" "/PCI/FUNCTION")
profile=$(get_xpath_val "$pci" "/PCI/PROFILE")
mode=$(get_xpath_val "$pci" "/PCI/MDEV_MODE")
# Generate mdev path
mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func"
if [[ "$mode" == "legacy" || -z "$mode" ]]; then
# Generate mdev path
mdev="/sys/class/mdev_bus/$domain:$bus:$slot.$func"
if [[ ! -d $mdev ]]
then
error_message "Directory '$mdev' does not exist"
exit 1
if [[ ! -d $mdev ]]; then
error_message "Directory '$mdev' does not exist"
exit 1
fi
if [ -z "$profile" ]; then
profile="$(ls "$mdev/mdev_supported_types" | head -n1)"
fi
mdev="$mdev/mdev_supported_types/$profile/"
case "$3" in
"create")
if ! echo "$1" > "$mdev/create"; then
error_message "Error creating mediated device"
exit 1
fi
;;
"remove")
if ! echo "1" > "$mdev/devices/$1/remove"; then
error_message "Error removing mediated device"
fi
;;
esac
else
pci="/sys/bus/pci/devices/$domain:$bus:$slot.$func"
if [[ ! -d "${pci}" ]]; then
error_message "Directory '$pci' does not exist"
exit 1
fi
ppath="${pci}/nvidia/creatable_vgpu_types"
if [ -z "$profile" ]; then
profile=$(sed -n '2p' ${ppath} | cut -f1 -d':' | tr -d '[:blank:]')
else
profile=${profile%% *}
if [[ "$3" == "create" ]] && ! grep -q "${profile}" ${ppath}; then
error_message "Profile '$profile' not supported by vGPU"
exit 1
fi
fi
case "$3" in
"create")
if ! echo "${profile}" > "${pci}/nvidia/current_vgpu_type"; then
error_message "Error activating vgpu with profile ${profile}"
exit 1
fi
;;
"remove")
if ! echo "0" > "${pci}/nvidia/current_vgpu_type"; then
error_message "Error deactivating vgpu"
fi
;;
esac
fi
if [ -z "$profile" ]
then
profile="$(ls "$mdev/mdev_supported_types" | head -n1)"
fi
echo "$mdev/mdev_supported_types/$profile/"
}
# ------------------------------------------------------------------------------
@ -80,27 +129,17 @@ case "$ACTION" in
if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"
if ! echo "$uuid" > "$mdev/create"; then
error_message "Error creating mediated device"
exit 1
fi
vgpuctl "$uuid" "$VM" "create"
done
fi
;;
"delete")
# Not exit with error, just log
uuids="$(get_uuids "$VM")"
if [ -n "$uuids" ]; then
for uuid in $uuids; do
mdev="$(get_mdev_path "$uuid" "$VM")"
if ! echo "1" > "$mdev/devices/$uuid/remove"; then
error_message "Error removing mediated device"
# Not exit with error, just log the error
# exit -1
fi
vgpuctl "$uuid" "$VM" "remove"
done
fi
;;