From 9009d3b5c3b6d191be69215736be77583e0f23f9 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Tue, 16 Jan 2018 16:08:10 -0500 Subject: [PATCH 1/4] udev: net_id: search parent devices for PCI slot number To generate predictable network device names, the code in udev-builting-net_id.c tries to match the PCI device address of the network device to the entries in /sys/bus/pci/slots. However, sometimes the slot number is not associated the network controller PCI device itself, but rather with one of its parents. This change will try to find a match in /sys/bus/pci/slots for the parents of the PCI network device, if it doesn't find a match for the device itself. --- src/udev/udev-builtin-net_id.c | 43 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c index 6efa7129303..3d3329b3824 100644 --- a/src/udev/udev-builtin-net_id.c +++ b/src/udev/udev-builtin-net_id.c @@ -112,6 +112,7 @@ #include "dirent-util.h" #include "fd-util.h" #include "fileio.h" +#include "parse-util.h" #include "stdio-util.h" #include "string-util.h" #include "udev.h" @@ -233,15 +234,15 @@ static bool is_pci_multifunction(struct udev_device *dev) { static int dev_pci_slot(struct udev_device *dev, struct netnames *names) { struct udev *udev = udev_device_get_udev(names->pcidev); - unsigned domain, bus, slot, func, dev_port = 0; + unsigned domain, bus, slot, func, dev_port = 0, hotplug_slot = 0; size_t l; char *s; const char *attr, *port_name; _cleanup_udev_device_unref_ struct udev_device *pci = NULL; + struct udev_device *hotplug_slot_dev; char slots[PATH_MAX]; _cleanup_closedir_ DIR *dir = NULL; struct dirent *dent; - int hotplug_slot = 0; if (sscanf(udev_device_get_sysname(names->pcidev), "%x:%x:%x.%u", &domain, &bus, &slot, &func) != 4) return -ENOENT; @@ -281,27 +282,33 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) { if (!dir) return -errno; - FOREACH_DIRENT_ALL(dent, dir, break) { - int i; - char *rest, str[PATH_MAX]; - _cleanup_free_ char *address = NULL; + hotplug_slot_dev = names->pcidev; + while (hotplug_slot_dev) { + FOREACH_DIRENT_ALL(dent, dir, break) { + unsigned i; + int r; + char str[PATH_MAX]; + _cleanup_free_ char *address = NULL; - if (dent->d_name[0] == '.') - continue; - i = strtol(dent->d_name, &rest, 10); - if (rest[0] != '\0') - continue; - if (i < 1) - continue; + if (dent->d_name[0] == '.') + continue; + r = safe_atou_full(dent->d_name, 10, &i); + if (i < 1 || r < 0) + continue; - if (snprintf_ok(str, sizeof str, "%s/%s/address", slots, dent->d_name) && - read_one_line_file(str, &address) >= 0) - /* match slot address with device by stripping the function */ - if (startswith(udev_device_get_sysname(names->pcidev), address)) - hotplug_slot = i; + if (snprintf_ok(str, sizeof str, "%s/%s/address", slots, dent->d_name) && + read_one_line_file(str, &address) >= 0) + /* match slot address with device by stripping the function */ + if (startswith(udev_device_get_sysname(hotplug_slot_dev), address)) + hotplug_slot = i; + if (hotplug_slot > 0) + break; + } if (hotplug_slot > 0) break; + rewinddir(dir); + hotplug_slot_dev = udev_device_get_parent_with_subsystem_devtype(hotplug_slot_dev, "pci", NULL); } if (hotplug_slot > 0) { From 609948c7043a40008b8299529c978ed8e11de8f6 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 17 Jan 2018 14:31:55 -0500 Subject: [PATCH 2/4] udev: net_id: Improve predictable names for SR-IOV virtual devices With PCI SR-IOV, a number of virtual network devices can be enabled, all of which share the same physical network device. Currently, udev generates names for SR-IOV virtual functions as if they were independent network devices. With this change, the predictable network device naming code will check if a network device is an SR-IOV virtual device, and will generate a name based on the physical PCI device plus a "v%u" suffix. This should improve readability and predictability of device names. Here is an example of how this change would affect naming: before patch | after patch ----------------------------- eno1 | eno1 onboard NIC, physical function enp101s0f0 | eno1v0 onboard NIC, SR-IOV virtual func 0 enp101s0f1 | eno1v1 onboard NIC, SR-IOV virtual func 1 --- src/udev/udev-builtin-net_id.c | 94 +++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c index 3d3329b3824..233bdd2646d 100644 --- a/src/udev/udev-builtin-net_id.c +++ b/src/udev/udev-builtin-net_id.c @@ -112,6 +112,7 @@ #include "dirent-util.h" #include "fd-util.h" #include "fileio.h" +#include "fs-util.h" #include "parse-util.h" #include "stdio-util.h" #include "string-util.h" @@ -150,6 +151,11 @@ struct netnames { char platform_path[IFNAMSIZ]; }; +struct virtfn_info { + struct udev_device *physfn_pcidev; + char suffix[IFNAMSIZ]; +}; + /* skip intermediate virtio devices */ static struct udev_device *skip_virtio(struct udev_device *dev) { struct udev_device *parent = dev; @@ -162,6 +168,67 @@ static struct udev_device *skip_virtio(struct udev_device *dev) { return parent; } +static int get_virtfn_info(struct udev_device *dev, struct netnames *names, struct virtfn_info *vf_info) { + struct udev *udev; + const char *physfn_link_file; + _cleanup_free_ char *physfn_pci_syspath = NULL; + _cleanup_free_ char *virtfn_pci_syspath = NULL; + struct dirent *dent; + _cleanup_closedir_ DIR *dir = NULL; + struct virtfn_info vf_info_local = {}; + int r; + + udev = udev_device_get_udev(names->pcidev); + if (!udev) + return -ENOENT; + /* Check if this is a virtual function. */ + physfn_link_file = strjoina(udev_device_get_syspath(names->pcidev), "/physfn"); + r = chase_symlinks(physfn_link_file, NULL, 0, &physfn_pci_syspath); + if (r < 0) + return r; + + /* Get physical function's pci device. */ + vf_info_local.physfn_pcidev = udev_device_new_from_syspath(udev, physfn_pci_syspath); + if (!vf_info_local.physfn_pcidev) + return -ENOENT; + + /* Find the virtual function number by finding the right virtfn link. */ + dir = opendir(physfn_pci_syspath); + if (!dir) { + r = -errno; + goto out_unref; + } + FOREACH_DIRENT_ALL(dent, dir, break) { + _cleanup_free_ char *virtfn_link_file = NULL; + if (!startswith(dent->d_name, "virtfn")) + continue; + virtfn_link_file = strjoin(physfn_pci_syspath, "/", dent->d_name); + if (!virtfn_link_file) { + r = -ENOMEM; + goto out_unref; + } + if (chase_symlinks(virtfn_link_file, NULL, 0, &virtfn_pci_syspath) < 0) + continue; + if (streq(udev_device_get_syspath(names->pcidev), virtfn_pci_syspath)) { + if (!snprintf_ok(vf_info_local.suffix, sizeof(vf_info_local.suffix), "v%s", &dent->d_name[6])) { + r = -ENOENT; + goto out_unref; + } + break; + } + } + if (isempty(vf_info_local.suffix)) { + r = -ENOENT; + goto out_unref; + } + *vf_info = vf_info_local; + return 0; + +out_unref: + udev_device_unref(vf_info_local.physfn_pcidev); + return r; +} + /* retrieve on-board index number and label from firmware */ static int dev_pci_onboard(struct udev_device *dev, struct netnames *names) { unsigned dev_port = 0; @@ -413,6 +480,8 @@ static int names_platform(struct udev_device *dev, struct netnames *names, bool static int names_pci(struct udev_device *dev, struct netnames *names) { struct udev_device *parent; + struct netnames vf_names = {}; + struct virtfn_info vf_info = {}; assert(dev); assert(names); @@ -433,8 +502,29 @@ static int names_pci(struct udev_device *dev, struct netnames *names) { if (!names->pcidev) return -ENOENT; } - dev_pci_onboard(dev, names); - dev_pci_slot(dev, names); + + if (get_virtfn_info(dev, names, &vf_info) >= 0) { + /* If this is an SR-IOV virtual device, get base name using physical device and add virtfn suffix. */ + vf_names.pcidev = vf_info.physfn_pcidev; + dev_pci_onboard(dev, &vf_names); + dev_pci_slot(dev, &vf_names); + if (vf_names.pci_onboard[0]) + if (strlen(vf_names.pci_onboard) + strlen(vf_info.suffix) < sizeof(names->pci_onboard)) + strscpyl(names->pci_onboard, sizeof(names->pci_onboard), + vf_names.pci_onboard, vf_info.suffix, NULL); + if (vf_names.pci_slot[0]) + if (strlen(vf_names.pci_slot) + strlen(vf_info.suffix) < sizeof(names->pci_slot)) + strscpyl(names->pci_slot, sizeof(names->pci_slot), + vf_names.pci_slot, vf_info.suffix, NULL); + if (vf_names.pci_path[0]) + if (strlen(vf_names.pci_path) + strlen(vf_info.suffix) < sizeof(names->pci_path)) + strscpyl(names->pci_path, sizeof(names->pci_path), + vf_names.pci_path, vf_info.suffix, NULL); + udev_device_unref(vf_info.physfn_pcidev); + } else { + dev_pci_onboard(dev, names); + dev_pci_slot(dev, names); + } return 0; } From 6bc04997b6eab35d1cb9fa73889892702c27be09 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Thu, 18 Jan 2018 15:14:56 -0500 Subject: [PATCH 3/4] udev: net_id: Improve predictable names for NPAR devices NPAR is a technology that allows a single network interface to be divided into number of partitions. The partitions show up as functions on the same PCI device... when there are more than 8 functions, ARI (alternative routing-ID interpretation) is used. With ARI is enabled, the 8 bit field that normally has 5 bits for the PCI device and 3 bits for the PCI function is instead interpreted as (implicit) device 0, with 8 bits for the function number. Because the linux kernel exposes the PCI device/function numbers to userspace the same regardless of whether ARI is enabled, systemd predictable device naming can generate unpredictable names in this case, because network names using the PCI slot use the function number, but not the device number, causing systemd to generate the same name for mulitple network devices (so some will revert to the "ethX" names). With this patch, device naming code checks if ARI is enabled for a PCI network device, and uses the full 8-bit function number for naming to avoid this situation. This should improve readability and predictability of device names. Here is an example of how this change would affect naming: before patch | after patch ----------------------------- ens2f0 | ens2f0 NPAR partition 0 (in PCI slot 2) ens2f1 | ens2f1 NPAR partition 1 ... ens2f7 | ens2f7 NPAR partition 7 eth1 | ens2f8 NPAR partition 8 eth2 | ens2f9 NPAR partition 9 --- src/udev/udev-builtin-net_id.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/udev/udev-builtin-net_id.c b/src/udev/udev-builtin-net_id.c index 233bdd2646d..54d2ea0aaa6 100644 --- a/src/udev/udev-builtin-net_id.c +++ b/src/udev/udev-builtin-net_id.c @@ -299,6 +299,10 @@ static bool is_pci_multifunction(struct udev_device *dev) { return false; } +static bool is_pci_ari_enabled(struct udev_device *dev) { + return !!udev_device_get_sysattr_value(dev, "ari_enabled"); +} + static int dev_pci_slot(struct udev_device *dev, struct netnames *names) { struct udev *udev = udev_device_get_udev(names->pcidev); unsigned domain, bus, slot, func, dev_port = 0, hotplug_slot = 0; @@ -313,6 +317,11 @@ static int dev_pci_slot(struct udev_device *dev, struct netnames *names) { if (sscanf(udev_device_get_sysname(names->pcidev), "%x:%x:%x.%u", &domain, &bus, &slot, &func) != 4) return -ENOENT; + if (is_pci_ari_enabled(names->pcidev)) + /* ARI devices support up to 256 functions on a single device ("slot"), and interpret the + * traditional 5-bit slot and 3-bit function number as a single 8-bit function number, + * where the slot makes up the upper 5 bits. */ + func += slot * 8; /* kernel provided port index for multiple ports on a single PCI function */ attr = udev_device_get_sysattr_value(dev, "dev_port"); From 019cb3abf0760e9dd597b807ed8785671f74f305 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 14 Feb 2018 15:44:47 -0500 Subject: [PATCH 4/4] NEWS: Warn about predictable network naming changes --- NEWS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/NEWS b/NEWS index 52724587ede..b05f601c38e 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,14 @@ systemd System and Service Manager +CHANGES WITH 239 in spe: + + * NETWORK INTERFACE DEVICE NAMING CHANGES: systemd-udevd's "net_id" + builtin may name network interfaces differently than in previous + versions. SR-IOV virtual functions and NPAR partitions with PCI + function numbers of 8 and above will be named more predictably, + and udev may generate names based on PCI slot number in some cases + where it previously did not. + CHANGES WITH 238: * The MemoryAccounting= unit property now defaults to on. After