1
0
mirror of git://sourceware.org/git/lvm2.git synced 2024-12-21 13:34:40 +03:00

filter-mpath: work with nvme devices

Recognize when a device is nvme, and apply filter-mpath to
nvme devices in addition to scsi devices.
This commit is contained in:
David Teigland 2020-12-03 10:48:21 -06:00
parent 48dfc388f7
commit bee9f4efdd
4 changed files with 178 additions and 64 deletions

View File

@ -21,6 +21,7 @@
#include "lib/metadata/metadata.h" #include "lib/metadata/metadata.h"
#include "lib/device/bcache.h" #include "lib/device/bcache.h"
#include "lib/label/label.h" #include "lib/label/label.h"
#include "lib/commands/toolcontext.h"
#ifdef BLKID_WIPING_SUPPORT #ifdef BLKID_WIPING_SUPPORT
#include <blkid.h> #include <blkid.h>
@ -67,6 +68,31 @@ int dev_is_pmem(struct device *dev)
return is_pmem ? 1 : 0; return is_pmem ? 1 : 0;
} }
/*
* An nvme device has major number 259 (BLKEXT), minor number <minor>,
* and reading /sys/dev/block/259:<minor>/device/dev shows a character
* device cmajor:cminor where cmajor matches the major number of the
* nvme character device entry in /proc/devices. Checking all of that
* is excessive and unnecessary compared to just comparing /dev/name*.
*/
int dev_is_nvme(struct dev_types *dt, struct device *dev)
{
struct dm_str_list *strl;
if (dev->flags & DEV_IS_NVME)
return 1;
dm_list_iterate_items(strl, &dev->aliases) {
if (!strncmp(strl->str, "/dev/nvme", 9)) {
log_debug("Found nvme device %s", dev_name(dev));
dev->flags |= DEV_IS_NVME;
return 1;
}
}
return 0;
}
int dev_is_lv(struct device *dev) int dev_is_lv(struct device *dev)
{ {
FILE *fp; FILE *fp;
@ -302,6 +328,9 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev)
const char *dev_subsystem_name(struct dev_types *dt, struct device *dev) const char *dev_subsystem_name(struct dev_types *dt, struct device *dev)
{ {
if (dev->flags & DEV_IS_NVME)
return "NVME";
if (MAJOR(dev->dev) == dt->device_mapper_major) if (MAJOR(dev->dev) == dt->device_mapper_major)
return "DM"; return "DM";
@ -348,7 +377,6 @@ int major_is_scsi_device(struct dev_types *dt, int major)
return (dt->dev_type_array[major].flags & PARTITION_SCSI_DEVICE) ? 1 : 0; return (dt->dev_type_array[major].flags & PARTITION_SCSI_DEVICE) ? 1 : 0;
} }
static int _loop_is_with_partscan(struct device *dev) static int _loop_is_with_partscan(struct device *dev)
{ {
FILE *fp; FILE *fp;
@ -398,6 +426,28 @@ struct partition {
uint32_t nr_sects; uint32_t nr_sects;
} __attribute__((packed)); } __attribute__((packed));
static int _has_sys_partition(struct device *dev)
{
char path[PATH_MAX];
struct stat info;
int major = (int) MAJOR(dev->dev);
int minor = (int) MINOR(dev->dev);
/* check if dev is a partition */
if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d/partition",
dm_sysfs_dir(), major, minor) < 0) {
log_error("dm_snprintf partition failed");
return 0;
}
if (stat(path, &info) == -1) {
if (errno != ENOENT)
log_sys_error("stat", path);
return 0;
}
return 1;
}
static int _is_partitionable(struct dev_types *dt, struct device *dev) static int _is_partitionable(struct dev_types *dt, struct device *dev)
{ {
int parts = major_max_partitions(dt, MAJOR(dev->dev)); int parts = major_max_partitions(dt, MAJOR(dev->dev));
@ -414,6 +464,13 @@ static int _is_partitionable(struct dev_types *dt, struct device *dev)
_loop_is_with_partscan(dev)) _loop_is_with_partscan(dev))
return 1; return 1;
if (dev_is_nvme(dt, dev)) {
/* If this dev is already a partition then it's not partitionable. */
if (_has_sys_partition(dev))
return 0;
return 1;
}
if ((parts <= 1) || (MINOR(dev->dev) % parts)) if ((parts <= 1) || (MINOR(dev->dev) % parts))
return 0; return 0;
@ -557,10 +614,17 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
char path[PATH_MAX]; char path[PATH_MAX];
char temp_path[PATH_MAX]; char temp_path[PATH_MAX];
char buffer[64]; char buffer[64];
struct stat info;
FILE *fp = NULL; FILE *fp = NULL;
int parts, residue, size, ret = 0; int parts, residue, size, ret = 0;
/*
* /dev/nvme devs don't use the major:minor numbering like
* block dev types that have their own major number, so
* the calculation based on minor number doesn't work.
*/
if (dev_is_nvme(dt, dev))
goto sys_partition;
/* /*
* Try to get the primary dev out of the * Try to get the primary dev out of the
* list of known device types first. * list of known device types first.
@ -576,23 +640,14 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
goto out; goto out;
} }
sys_partition:
/* /*
* If we can't get the primary dev out of the list of known device * If we can't get the primary dev out of the list of known device
* types, try to look at sysfs directly then. This is more complex * types, try to look at sysfs directly then. This is more complex
* way and it also requires certain sysfs layout to be present * way and it also requires certain sysfs layout to be present
* which might not be there in old kernels! * which might not be there in old kernels!
*/ */
if (!_has_sys_partition(dev)) {
/* check if dev is a partition */
if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d/partition",
sysfs_dir, major, minor) < 0) {
log_error("dm_snprintf partition failed");
goto out;
}
if (stat(path, &info) == -1) {
if (errno != ENOENT)
log_sys_error("stat", path);
*result = dev->dev; *result = dev->dev;
ret = 1; ret = 1;
goto out; /* dev is not a partition! */ goto out; /* dev is not a partition! */

View File

@ -95,6 +95,8 @@ int dev_is_rotational(struct dev_types *dt, struct device *dev);
int dev_is_pmem(struct device *dev); int dev_is_pmem(struct device *dev);
int dev_is_nvme(struct dev_types *dt, struct device *dev);
int dev_is_lv(struct device *dev); int dev_is_lv(struct device *dev);
int get_fs_block_size(struct device *dev, uint32_t *fs_block_size); int get_fs_block_size(struct device *dev, uint32_t *fs_block_size);

View File

@ -38,6 +38,7 @@
#define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */ #define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */
#define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */ #define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */
#define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */ #define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */
#define DEV_IS_NVME 0x00080000 /* set if dev is nvme */
/* /*
* Support for external device info. * Support for external device info.

View File

@ -16,6 +16,7 @@
#include "lib/misc/lib.h" #include "lib/misc/lib.h"
#include "lib/filters/filter.h" #include "lib/filters/filter.h"
#include "lib/activate/activate.h" #include "lib/activate/activate.h"
#include "lib/commands/toolcontext.h"
#ifdef UDEV_SYNC_SUPPORT #ifdef UDEV_SYNC_SUPPORT
#include <libudev.h> #include <libudev.h>
#include "lib/device/dev-ext-udev-constants.h" #include "lib/device/dev-ext-udev-constants.h"
@ -27,7 +28,6 @@
#define MPATH_PREFIX "mpath-" #define MPATH_PREFIX "mpath-"
struct mpath_priv { struct mpath_priv {
struct dm_pool *mem; struct dm_pool *mem;
struct dev_filter f; struct dev_filter f;
@ -35,6 +35,9 @@ struct mpath_priv {
struct dm_hash_table *hash; struct dm_hash_table *hash;
}; };
/*
* given "/dev/foo" return "foo"
*/
static const char *_get_sysfs_name(struct device *dev) static const char *_get_sysfs_name(struct device *dev)
{ {
const char *name; const char *name;
@ -53,6 +56,11 @@ static const char *_get_sysfs_name(struct device *dev)
return name; return name;
} }
/*
* given major:minor
* readlink translates /sys/dev/block/major:minor to /sys/.../foo
* from /sys/.../foo return "foo"
*/
static const char *_get_sysfs_name_by_devt(const char *sysfs_dir, dev_t devno, static const char *_get_sysfs_name_by_devt(const char *sysfs_dir, dev_t devno,
char *buf, size_t buf_size) char *buf, size_t buf_size)
{ {
@ -102,27 +110,28 @@ static int _get_sysfs_string(const char *path, char *buffer, int max_size)
return r; return r;
} }
static int _get_sysfs_get_major_minor(const char *sysfs_dir, const char *kname, int *major, int *minor) static int _get_sysfs_dm_mpath(struct dev_types *dt, const char *sysfs_dir, const char *holder_name)
{ {
char path[PATH_MAX], buffer[64]; char path[PATH_MAX];
char buffer[128];
if (dm_snprintf(path, sizeof(path), "%sblock/%s/dev", sysfs_dir, kname) < 0) { if (dm_snprintf(path, sizeof(path), "%sblock/%s/dm/uuid", sysfs_dir, holder_name) < 0) {
log_error("Sysfs path string is too long."); log_error("Sysfs path string is too long.");
return 0; return 0;
} }
buffer[0] = '\0';
if (!_get_sysfs_string(path, buffer, sizeof(buffer))) if (!_get_sysfs_string(path, buffer, sizeof(buffer)))
return_0; return_0;
if (sscanf(buffer, "%d:%d", major, minor) != 2) { if (!strncmp(buffer, MPATH_PREFIX, 6))
log_error("Failed to parse major minor from %s", buffer);
return 0;
}
return 1; return 1;
return 0;
} }
static int _get_parent_mpath(const char *dir, char *name, int max_size) static int _get_holder_name(const char *dir, char *name, int max_size)
{ {
struct dirent *d; struct dirent *d;
DIR *dr; DIR *dr;
@ -155,7 +164,7 @@ static int _get_parent_mpath(const char *dir, char *name, int max_size)
} }
#ifdef UDEV_SYNC_SUPPORT #ifdef UDEV_SYNC_SUPPORT
static int _udev_dev_is_mpath(struct device *dev) static int _udev_dev_is_mpath_component(struct device *dev)
{ {
const char *value; const char *value;
struct dev_ext *ext; struct dev_ext *ext;
@ -174,95 +183,148 @@ static int _udev_dev_is_mpath(struct device *dev)
return 0; return 0;
} }
#else #else
static int _udev_dev_is_mpath(struct device *dev) static int _udev_dev_is_mpath_component(struct device *dev)
{ {
return 0; return 0;
} }
#endif #endif
static int _native_dev_is_mpath(struct dev_filter *f, struct device *dev) static int _native_dev_is_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev)
{ {
struct mpath_priv *mp = (struct mpath_priv *) f->private; struct mpath_priv *mp = (struct mpath_priv *) f->private;
struct dev_types *dt = mp->dt; struct dev_types *dt = mp->dt;
const char *part_name, *name; const char *part_name;
struct stat info; const char *name; /* e.g. "sda" for "/dev/sda" */
char path[PATH_MAX], parent_name[PATH_MAX]; char link_path[PATH_MAX]; /* some obscure, unpredictable sysfs path */
char holders_path[PATH_MAX]; /* e.g. "/sys/block/sda/holders/" */
char dm_dev_path[PATH_MAX]; /* e.g. "/dev/dm-1" */
char holder_name[128] = { 0 }; /* e.g. "dm-1" */
const char *sysfs_dir = dm_sysfs_dir(); const char *sysfs_dir = dm_sysfs_dir();
int major = MAJOR(dev->dev); int dev_major = MAJOR(dev->dev);
int minor = MINOR(dev->dev); int dev_minor = MINOR(dev->dev);
int dm_dev_major;
int dm_dev_minor;
struct stat info;
dev_t primary_dev; dev_t primary_dev;
long look; long look;
/* Limit this filter only to SCSI devices */ /* Limit this filter to SCSI or NVME devices */
if (!major_is_scsi_device(dt, MAJOR(dev->dev))) if (!major_is_scsi_device(dt, dev_major) && !dev_is_nvme(dt, dev))
return 0; return 0;
switch (dev_get_primary_dev(dt, dev, &primary_dev)) { switch (dev_get_primary_dev(dt, dev, &primary_dev)) {
case 2: /* The dev is partition. */ case 2: /* The dev is partition. */
part_name = dev_name(dev); /* name of original dev for log_debug msg */ part_name = dev_name(dev); /* name of original dev for log_debug msg */
if (!(name = _get_sysfs_name_by_devt(sysfs_dir, primary_dev, parent_name, sizeof(parent_name))))
/* gets "foo" for "/dev/foo" where "/dev/foo" comes from major:minor */
if (!(name = _get_sysfs_name_by_devt(sysfs_dir, primary_dev, link_path, sizeof(link_path))))
return_0; return_0;
log_debug_devs("%s: Device is a partition, using primary " log_debug_devs("%s: Device is a partition, using primary "
"device %s for mpath component detection", "device %s for mpath component detection",
part_name, name); part_name, name);
break; break;
case 1: /* The dev is already a primary dev. Just continue with the dev. */ case 1: /* The dev is already a primary dev. Just continue with the dev. */
/* gets "foo" for "/dev/foo" */
if (!(name = _get_sysfs_name(dev))) if (!(name = _get_sysfs_name(dev)))
return_0; return_0;
break; break;
default: /* 0, error. */ default: /* 0, error. */
log_warn("Failed to get primary device for %d:%d.", major, minor); log_warn("Failed to get primary device for %d:%d.", dev_major, dev_minor);
return 0; return 0;
} }
if (dm_snprintf(path, sizeof(path), "%sblock/%s/holders", sysfs_dir, name) < 0) { if (dm_snprintf(holders_path, sizeof(holders_path), "%sblock/%s/holders", sysfs_dir, name) < 0) {
log_warn("Sysfs path to check mpath is too long."); log_warn("Sysfs path to check mpath is too long.");
return 0; return 0;
} }
/* also will filter out partitions */ /* also will filter out partitions */
if (stat(path, &info)) if (stat(holders_path, &info))
return 0; return 0;
if (!S_ISDIR(info.st_mode)) { if (!S_ISDIR(info.st_mode)) {
log_warn("Path %s is not a directory.", path); log_warn("Path %s is not a directory.", holders_path);
return 0; return 0;
} }
if (!_get_parent_mpath(path, parent_name, sizeof(parent_name))) /*
* If holders dir contains an entry such as "dm-1", then this sets
* holder_name to "dm-1".
*
* If holders dir is empty, return 0 (this is generally where
* devs that are not mpath components return.)
*/
if (!_get_holder_name(holders_path, holder_name, sizeof(holder_name)))
return 0; return 0;
if (!_get_sysfs_get_major_minor(sysfs_dir, parent_name, &major, &minor)) if (dm_snprintf(dm_dev_path, sizeof(dm_dev_path), "%s/%s", cmd->dev_dir, holder_name) < 0) {
return_0; log_warn("dm device path to check mpath is too long.");
if (major != dt->device_mapper_major)
return 0; return 0;
}
/* Avoid repeated detection of multipath device and use first checked result */ /*
look = (long) dm_hash_lookup_binary(mp->hash, &minor, sizeof(minor)); * stat "/dev/dm-1" which is the holder of the dev we're checking
* dm_dev_major:dm_dev_minor come from stat("/dev/dm-1")
*/
if (stat(dm_dev_path, &info)) {
log_debug("filter-mpath %s holder %s stat result %d",
dev_name(dev), dm_dev_path, errno);
return 0;
}
dm_dev_major = (int)MAJOR(info.st_rdev);
dm_dev_minor = (int)MINOR(info.st_rdev);
if (dm_dev_major != dt->device_mapper_major) {
log_debug_devs("filter-mpath %s holder %s %d:%d does not have dm major",
dev_name(dev), dm_dev_path, dm_dev_major, dm_dev_minor);
return 0;
}
/*
* Save the result of checking that "/dev/dm-1" is an mpath device
* to avoid repeating it for each path component.
* The minor number of "/dev/dm-1" is added to the hash table with
* const value 2 meaning that dm minor 1 (for /dev/dm-1) is a multipath dev
* and const value 1 meaning that dm minor 1 is not a multipath dev.
*/
look = (long) dm_hash_lookup_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor));
if (look > 0) { if (look > 0) {
log_debug_devs("%s(%u:%u): already checked as %sbeing mpath.", log_debug_devs("filter-mpath %s holder %s %u:%u already checked as %sbeing mpath.",
parent_name, major, minor, (look > 1) ? "" : "not "); dev_name(dev), holder_name, dm_dev_major, dm_dev_minor, (look > 1) ? "" : "not ");
return (look > 1) ? 1 : 0; return (look > 1) ? 1 : 0;
} }
if (lvm_dm_prefix_check(major, minor, MPATH_PREFIX)) { /*
(void) dm_hash_insert_binary(mp->hash, &minor, sizeof(minor), (void*)2); * Returns 1 if /sys/block/<holder_name>/dm/uuid indicates that
* <holder_name> is a dm device with dm uuid prefix mpath-.
* When true, <holder_name> will be something like "dm-1".
*
* (Is a hash table worth it to avoid reading one sysfs file?)
*/
if (_get_sysfs_dm_mpath(dt, sysfs_dir, holder_name)) {
log_debug_devs("filter-mpath %s holder %s %u:%u ignore mpath component",
dev_name(dev), holder_name, dm_dev_major, dm_dev_minor);
(void) dm_hash_insert_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor), (void*)2);
return 1; return 1;
} }
(void) dm_hash_insert_binary(mp->hash, &minor, sizeof(minor), (void*)1); (void) dm_hash_insert_binary(mp->hash, &dm_dev_minor, sizeof(dm_dev_minor), (void*)1);
return 0; return 0;
} }
static int _dev_is_mpath(struct dev_filter *f, struct device *dev) static int _dev_is_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev)
{ {
if (dev->ext.src == DEV_EXT_NONE) if (dev->ext.src == DEV_EXT_NONE)
return _native_dev_is_mpath(f, dev); return _native_dev_is_mpath_component(cmd, f, dev);
if (dev->ext.src == DEV_EXT_UDEV) if (dev->ext.src == DEV_EXT_UDEV)
return _udev_dev_is_mpath(dev); return _udev_dev_is_mpath_component(dev);
log_error(INTERNAL_ERROR "Missing hook for mpath recognition " log_error(INTERNAL_ERROR "Missing hook for mpath recognition "
"using external device info source %s", dev_ext_name(dev)); "using external device info source %s", dev_ext_name(dev));
@ -272,11 +334,11 @@ static int _dev_is_mpath(struct dev_filter *f, struct device *dev)
#define MSG_SKIPPING "%s: Skipping mpath component device" #define MSG_SKIPPING "%s: Skipping mpath component device"
static int _ignore_mpath(struct cmd_context *cmd, struct dev_filter *f, struct device *dev, const char *use_filter_name) static int _ignore_mpath_component(struct cmd_context *cmd, struct dev_filter *f, struct device *dev, const char *use_filter_name)
{ {
dev->filtered_flags &= ~DEV_FILTERED_MPATH_COMPONENT; dev->filtered_flags &= ~DEV_FILTERED_MPATH_COMPONENT;
if (_dev_is_mpath(f, dev) == 1) { if (_dev_is_mpath_component(cmd, f, dev) == 1) {
if (dev->ext.src == DEV_EXT_NONE) if (dev->ext.src == DEV_EXT_NONE)
log_debug_devs(MSG_SKIPPING, dev_name(dev)); log_debug_devs(MSG_SKIPPING, dev_name(dev));
else else
@ -303,8 +365,8 @@ static void _destroy(struct dev_filter *f)
struct dev_filter *mpath_filter_create(struct dev_types *dt) struct dev_filter *mpath_filter_create(struct dev_types *dt)
{ {
const char *sysfs_dir = dm_sysfs_dir(); const char *sysfs_dir = dm_sysfs_dir();
struct dm_pool *mem;
struct mpath_priv *mp; struct mpath_priv *mp;
struct dm_pool *mem;
struct dm_hash_table *hash; struct dm_hash_table *hash;
if (!*sysfs_dir) { if (!*sysfs_dir) {
@ -328,19 +390,13 @@ struct dev_filter *mpath_filter_create(struct dev_types *dt)
goto bad; goto bad;
} }
if (!(mp = dm_pool_zalloc(mem, sizeof(*mp)))) { mp->f.passes_filter = _ignore_mpath_component;
log_error("mpath filter allocation failed.");
goto bad;
}
mp->f.passes_filter = _ignore_mpath;
mp->f.destroy = _destroy; mp->f.destroy = _destroy;
mp->f.use_count = 0; mp->f.use_count = 0;
mp->f.private = mp; mp->f.private = mp;
mp->f.name = "mpath"; mp->f.name = "mpath";
mp->mem = mem;
mp->dt = dt; mp->dt = dt;
mp->mem = mem;
mp->hash = hash; mp->hash = hash;
log_debug_devs("mpath filter initialised."); log_debug_devs("mpath filter initialised.");