mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-25 01:34:38 +03:00
9048565093
related to config settings: obtain_device_info_from_udev (controls if lvm gets a list of devices from readdir /dev or from libudev) external_device_info_source (controls if lvm asks libudev for device information) . Make the obtain_device_list_from_udev setting affect only the choice of readdir /dev vs libudev. The setting no longer controls if udev is used for device type checks. . Change obtain_device_list_from_udev default to 0. This helps avoid boot timeouts due to slow libudev queries, avoids reported failures from udev_enumerate_scan_devices, and avoids delays from "device not initialized in udev database" errors. Even without errors, for a system booting with 1024 PVs, lvm2-pvscan times improve from about 100 sec to 15 sec, and the pvscan command from about 64 sec to about 4 sec. . For external_device_info_source="none", remove all libudev device info queries, and use only lvm native device info. . For external_device_info_source="udev", first check lvm native device info, then check libudev info. . Remove sleep/retry loop when attempting libudev queries for device info. udev info will simply be skipped if it's not immediately available. . Only set up a libdev connection if it will be used by obtain_device_list_from_udev/external_device_info_source. . For native multipath component detection, use /etc/multipath/wwids. If a device has a wwid matching an entry in the wwids file, then it's considered a multipath component. This is necessary to natively detect multipath components when the mpath device is not set up.
543 lines
13 KiB
C
543 lines
13 KiB
C
/*
|
|
* Copyright (C) 2004 Luca Berra
|
|
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "lib/misc/lib.h"
|
|
#include "lib/device/dev-type.h"
|
|
#include "lib/mm/xlate.h"
|
|
#include "lib/misc/crc.h"
|
|
#include "lib/commands/toolcontext.h"
|
|
#ifdef UDEV_SYNC_SUPPORT
|
|
#include <libudev.h> /* for MD detection using udev db records */
|
|
#include "lib/device/dev-ext-udev-constants.h"
|
|
#endif
|
|
|
|
#ifdef __linux__
|
|
|
|
/* Lifted from <linux/raid/md_p.h> because of difficulty including it */
|
|
|
|
#define MD_SB_MAGIC 0xa92b4efc
|
|
#define MD_RESERVED_BYTES (64 * 1024ULL)
|
|
#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512)
|
|
#define MD_NEW_SIZE_SECTORS(x) (((x) & ~(MD_RESERVED_SECTORS - 1)) \
|
|
- MD_RESERVED_SECTORS)
|
|
#define MD_MAX_SYSFS_SIZE 64
|
|
|
|
static int _dev_has_md_magic(struct device *dev, uint64_t sb_offset)
|
|
{
|
|
uint32_t md_magic;
|
|
|
|
/* Version 1 is little endian; version 0.90.0 is machine endian */
|
|
|
|
if (!dev_read_bytes(dev, sb_offset, sizeof(uint32_t), &md_magic))
|
|
return_0;
|
|
|
|
if ((md_magic == MD_SB_MAGIC) ||
|
|
((MD_SB_MAGIC != xlate32(MD_SB_MAGIC)) && (md_magic == xlate32(MD_SB_MAGIC))))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define IMSM_SIGNATURE "Intel Raid ISM Cfg Sig. "
|
|
#define IMSM_SIG_LEN (sizeof(IMSM_SIGNATURE) - 1)
|
|
|
|
static int _dev_has_imsm_magic(struct device *dev, uint64_t devsize_sectors)
|
|
{
|
|
char imsm_signature[IMSM_SIG_LEN];
|
|
uint64_t off;
|
|
unsigned int physical_block_size = 0;
|
|
unsigned int logical_block_size = 0;
|
|
|
|
if (!dev_get_direct_block_sizes(dev, &physical_block_size, &logical_block_size))
|
|
return_0;
|
|
|
|
if (logical_block_size == 4096)
|
|
off = (devsize_sectors * 512) - 8192;
|
|
else
|
|
off = (devsize_sectors * 512) - 1024;
|
|
|
|
if (!dev_read_bytes(dev, off, IMSM_SIG_LEN, imsm_signature))
|
|
return_0;
|
|
|
|
if (!memcmp(imsm_signature, IMSM_SIGNATURE, IMSM_SIG_LEN))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define DDF_MAGIC 0xDE11DE11
|
|
struct ddf_header {
|
|
uint32_t magic;
|
|
uint32_t crc;
|
|
char guid[24];
|
|
char revision[8];
|
|
char padding[472];
|
|
};
|
|
|
|
static int _dev_has_ddf_magic(struct device *dev, uint64_t devsize_sectors, uint64_t *sb_offset)
|
|
{
|
|
struct ddf_header hdr;
|
|
uint32_t crc, our_crc;
|
|
uint64_t off;
|
|
uint64_t devsize_bytes = devsize_sectors * 512;
|
|
|
|
if (devsize_bytes < 0x30000)
|
|
return 0;
|
|
|
|
/* 512 bytes before the end of device (from libblkid) */
|
|
off = ((devsize_bytes / 0x200) - 1) * 0x200;
|
|
|
|
if (!dev_read_bytes(dev, off, 512, &hdr))
|
|
return_0;
|
|
|
|
if ((hdr.magic == cpu_to_be32(DDF_MAGIC)) ||
|
|
(hdr.magic == cpu_to_le32(DDF_MAGIC))) {
|
|
crc = hdr.crc;
|
|
hdr.crc = 0xffffffff;
|
|
our_crc = calc_crc(0, (const uint8_t *)&hdr, 512);
|
|
|
|
if ((cpu_to_be32(our_crc) == crc) ||
|
|
(cpu_to_le32(our_crc) == crc)) {
|
|
*sb_offset = off;
|
|
return 1;
|
|
} else {
|
|
log_debug_devs("Found md ddf magic at %llu wrong crc %x disk %x %s",
|
|
(unsigned long long)off, our_crc, crc, dev_name(dev));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* 128KB before the end of device (from libblkid) */
|
|
off = ((devsize_bytes / 0x200) - 257) * 0x200;
|
|
|
|
if (!dev_read_bytes(dev, off, 512, &hdr))
|
|
return_0;
|
|
|
|
if ((hdr.magic == cpu_to_be32(DDF_MAGIC)) ||
|
|
(hdr.magic == cpu_to_le32(DDF_MAGIC))) {
|
|
crc = hdr.crc;
|
|
hdr.crc = 0xffffffff;
|
|
our_crc = calc_crc(0, (const uint8_t *)&hdr, 512);
|
|
|
|
if ((cpu_to_be32(our_crc) == crc) ||
|
|
(cpu_to_le32(our_crc) == crc)) {
|
|
*sb_offset = off;
|
|
return 1;
|
|
} else {
|
|
log_debug_devs("Found md ddf magic at %llu wrong crc %x disk %x %s",
|
|
(unsigned long long)off, our_crc, crc, dev_name(dev));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#ifdef UDEV_SYNC_SUPPORT
|
|
static int _dev_is_md_component_udev(struct device *dev)
|
|
{
|
|
const char *value;
|
|
struct dev_ext *ext;
|
|
|
|
/*
|
|
* external_device_info_source="udev" enables these udev checks.
|
|
* external_device_info_source="none" disables them.
|
|
*/
|
|
if (!(ext = dev_ext_get(dev)))
|
|
return_0;
|
|
|
|
if (!(value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE)))
|
|
return 0;
|
|
|
|
return !strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_SW_RAID);
|
|
}
|
|
#else
|
|
static int _dev_is_md_component_udev(struct device *dev)
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Returns -1 on error
|
|
*/
|
|
static int _dev_is_md_component_native(struct device *dev, uint64_t *offset_found, int full)
|
|
{
|
|
uint64_t size, sb_offset = 0;
|
|
int ret;
|
|
|
|
/* i/o layer has not been set up */
|
|
if (!scan_bcache) {
|
|
log_error(INTERNAL_ERROR "dev_is_md_component_native requires io layer.");
|
|
return -1;
|
|
}
|
|
|
|
if (!dev_get_size(dev, &size)) {
|
|
stack;
|
|
return -1;
|
|
}
|
|
|
|
if (size < MD_RESERVED_SECTORS * 2)
|
|
return 0;
|
|
|
|
/*
|
|
* Some md versions locate the magic number at the end of the device.
|
|
* Those checks can't be satisfied with the initial scan data, and
|
|
* require an extra read i/o at the end of every device. Issuing
|
|
* an extra read to every device in every command, just to check for
|
|
* the old md format is a bad tradeoff.
|
|
*
|
|
* When "full" is set, we check a the start and end of the device for
|
|
* md magic numbers. When "full" is not set, we only check at the
|
|
* start of the device for the magic numbers. We decide for each
|
|
* command if it should do a full check (cmd->use_full_md_check),
|
|
* and set it for commands that could possibly write to an md dev
|
|
* (pvcreate/vgcreate/vgextend).
|
|
*/
|
|
|
|
/*
|
|
* md superblock version 1.1 at offset 0 from start
|
|
*/
|
|
|
|
if (_dev_has_md_magic(dev, 0)) {
|
|
log_debug_devs("Found md magic number at offset 0 of %s.", dev_name(dev));
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* md superblock version 1.2 at offset 4KB from start
|
|
*/
|
|
|
|
if (_dev_has_md_magic(dev, 4096)) {
|
|
log_debug_devs("Found md magic number at offset 4096 of %s.", dev_name(dev));
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
if (!full) {
|
|
ret = 0;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Handle superblocks at the end of the device.
|
|
*/
|
|
|
|
/*
|
|
* md superblock version 0 at 64KB from end of device
|
|
* (after end is aligned to 64KB)
|
|
*/
|
|
|
|
sb_offset = MD_NEW_SIZE_SECTORS(size) << SECTOR_SHIFT;
|
|
|
|
if (_dev_has_md_magic(dev, sb_offset)) {
|
|
log_debug_devs("Found md magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev));
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* md superblock version 1.0 at 8KB from end of device
|
|
*/
|
|
|
|
sb_offset = ((size - 8 * 2) & ~(4 * 2 - 1ULL)) << SECTOR_SHIFT;
|
|
|
|
if (_dev_has_md_magic(dev, sb_offset)) {
|
|
log_debug_devs("Found md magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev));
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* md imsm superblock 1K from end of device
|
|
*/
|
|
|
|
if (_dev_has_imsm_magic(dev, size)) {
|
|
log_debug_devs("Found md imsm magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev));
|
|
sb_offset = 1024;
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* md ddf superblock 512 bytes from end, or 128KB from end
|
|
*/
|
|
|
|
if (_dev_has_ddf_magic(dev, size, &sb_offset)) {
|
|
log_debug_devs("Found md ddf magic number at offset %llu of %s.", (unsigned long long)sb_offset, dev_name(dev));
|
|
ret = 1;
|
|
goto out;
|
|
}
|
|
|
|
ret = 0;
|
|
out:
|
|
if (ret && offset_found)
|
|
*offset_found = sb_offset;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int dev_is_md_component(struct cmd_context *cmd, struct device *dev, uint64_t *offset_found, int full)
|
|
{
|
|
if (_dev_is_md_component_native(dev, offset_found, full) == 1)
|
|
goto found;
|
|
|
|
if (external_device_info_source() == DEV_EXT_UDEV) {
|
|
if (_dev_is_md_component_udev(dev) == 1)
|
|
goto found;
|
|
}
|
|
return 0;
|
|
|
|
found:
|
|
dev->flags |= DEV_IS_MD_COMPONENT;
|
|
return 1;
|
|
}
|
|
|
|
static int _md_sysfs_attribute_snprintf(char *path, size_t size,
|
|
struct dev_types *dt,
|
|
struct device *blkdev,
|
|
const char *attribute)
|
|
{
|
|
const char *sysfs_dir = dm_sysfs_dir();
|
|
struct stat info;
|
|
dev_t dev = blkdev->dev;
|
|
int ret = -1;
|
|
|
|
if (!sysfs_dir || !*sysfs_dir)
|
|
return ret;
|
|
|
|
if (MAJOR(dev) == dt->blkext_major) {
|
|
/* lookup parent MD device from blkext partition */
|
|
if (!dev_get_primary_dev(dt, blkdev, &dev))
|
|
return ret;
|
|
}
|
|
|
|
if (MAJOR(dev) != dt->md_major)
|
|
return ret;
|
|
|
|
ret = dm_snprintf(path, size, "%s/dev/block/%d:%d/md/%s", sysfs_dir,
|
|
(int)MAJOR(dev), (int)MINOR(dev), attribute);
|
|
if (ret < 0) {
|
|
log_error("dm_snprintf md %s failed", attribute);
|
|
return ret;
|
|
}
|
|
|
|
if (stat(path, &info) == -1) {
|
|
if (errno != ENOENT) {
|
|
log_sys_error("stat", path);
|
|
return ret;
|
|
}
|
|
/* old sysfs structure */
|
|
ret = dm_snprintf(path, size, "%s/block/md%d/md/%s",
|
|
sysfs_dir, (int)MINOR(dev), attribute);
|
|
if (ret < 0) {
|
|
log_error("dm_snprintf old md %s failed", attribute);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int _md_sysfs_attribute_scanf(struct dev_types *dt,
|
|
struct device *dev,
|
|
const char *attribute_name,
|
|
const char *attribute_fmt,
|
|
void *attribute_value)
|
|
{
|
|
char path[PATH_MAX+1], buffer[MD_MAX_SYSFS_SIZE];
|
|
FILE *fp;
|
|
int ret = 0;
|
|
|
|
if (_md_sysfs_attribute_snprintf(path, PATH_MAX, dt,
|
|
dev, attribute_name) < 0)
|
|
return ret;
|
|
|
|
if (!(fp = fopen(path, "r"))) {
|
|
log_debug("_md_sysfs_attribute_scanf fopen failed %s", path);
|
|
return ret;
|
|
}
|
|
|
|
if (!fgets(buffer, sizeof(buffer), fp)) {
|
|
log_debug("_md_sysfs_attribute_scanf fgets failed %s", path);
|
|
goto out;
|
|
}
|
|
|
|
if ((ret = sscanf(buffer, attribute_fmt, attribute_value)) != 1) {
|
|
log_error("%s sysfs attr %s not in expected format: %s",
|
|
dev_name(dev), attribute_name, buffer);
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
if (fclose(fp))
|
|
log_sys_error("fclose", path);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Retrieve chunk size from md device using sysfs.
|
|
*/
|
|
static unsigned long _dev_md_chunk_size(struct dev_types *dt, struct device *dev)
|
|
{
|
|
const char *attribute = "chunk_size";
|
|
unsigned long chunk_size_bytes = 0UL;
|
|
|
|
if (_md_sysfs_attribute_scanf(dt, dev, attribute,
|
|
"%lu", &chunk_size_bytes) != 1)
|
|
return 0;
|
|
|
|
log_very_verbose("Device %s %s is %lu bytes.",
|
|
dev_name(dev), attribute, chunk_size_bytes);
|
|
|
|
return chunk_size_bytes >> SECTOR_SHIFT;
|
|
}
|
|
|
|
/*
|
|
* Retrieve level from md device using sysfs.
|
|
*/
|
|
static int _dev_md_level(struct dev_types *dt, struct device *dev)
|
|
{
|
|
char level_string[MD_MAX_SYSFS_SIZE];
|
|
const char *attribute = "level";
|
|
int level = -1;
|
|
|
|
if (_md_sysfs_attribute_scanf(dt, dev, attribute,
|
|
"%s", &level_string) != 1)
|
|
return -1;
|
|
|
|
log_very_verbose("Device %s %s is %s.",
|
|
dev_name(dev), attribute, level_string);
|
|
|
|
/* We only care about raid - ignore linear/faulty/multipath etc. */
|
|
if (sscanf(level_string, "raid%d", &level) != 1)
|
|
return -1;
|
|
|
|
return level;
|
|
}
|
|
|
|
/*
|
|
* Retrieve raid_disks from md device using sysfs.
|
|
*/
|
|
static int _dev_md_raid_disks(struct dev_types *dt, struct device *dev)
|
|
{
|
|
const char *attribute = "raid_disks";
|
|
int raid_disks = 0;
|
|
|
|
if (_md_sysfs_attribute_scanf(dt, dev, attribute,
|
|
"%d", &raid_disks) != 1)
|
|
return 0;
|
|
|
|
log_very_verbose("Device %s %s is %d.",
|
|
dev_name(dev), attribute, raid_disks);
|
|
|
|
return raid_disks;
|
|
}
|
|
|
|
/*
|
|
* Calculate stripe width of md device using its sysfs files.
|
|
*/
|
|
unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev)
|
|
{
|
|
unsigned long chunk_size_sectors = 0UL;
|
|
unsigned long stripe_width_sectors = 0UL;
|
|
int level, raid_disks, data_disks;
|
|
|
|
chunk_size_sectors = _dev_md_chunk_size(dt, dev);
|
|
if (!chunk_size_sectors)
|
|
return 0;
|
|
|
|
level = _dev_md_level(dt, dev);
|
|
if (level < 0)
|
|
return 0;
|
|
|
|
raid_disks = _dev_md_raid_disks(dt, dev);
|
|
if (!raid_disks)
|
|
return 0;
|
|
|
|
/* The raid level governs the number of data disks. */
|
|
switch (level) {
|
|
case 0:
|
|
/* striped md does not have any parity disks */
|
|
data_disks = raid_disks;
|
|
break;
|
|
case 1:
|
|
case 10:
|
|
/* mirrored md effectively has 1 data disk */
|
|
data_disks = 1;
|
|
break;
|
|
case 4:
|
|
case 5:
|
|
/* both raid 4 and 5 have a single parity disk */
|
|
data_disks = raid_disks - 1;
|
|
break;
|
|
case 6:
|
|
/* raid 6 has 2 parity disks */
|
|
data_disks = raid_disks - 2;
|
|
break;
|
|
default:
|
|
log_error("Device %s has an unknown md raid level: %d",
|
|
dev_name(dev), level);
|
|
return 0;
|
|
}
|
|
|
|
stripe_width_sectors = chunk_size_sectors * data_disks;
|
|
|
|
log_very_verbose("Device %s stripe-width is %lu bytes.",
|
|
dev_name(dev),
|
|
stripe_width_sectors << SECTOR_SHIFT);
|
|
|
|
return stripe_width_sectors;
|
|
}
|
|
|
|
int dev_is_md_with_end_superblock(struct dev_types *dt, struct device *dev)
|
|
{
|
|
char version_string[MD_MAX_SYSFS_SIZE];
|
|
const char *attribute = "metadata_version";
|
|
|
|
if (MAJOR(dev->dev) != dt->md_major)
|
|
return 0;
|
|
|
|
if (_md_sysfs_attribute_scanf(dt, dev, attribute,
|
|
"%s", &version_string) != 1)
|
|
return 0;
|
|
|
|
log_very_verbose("Device %s %s is %s.",
|
|
dev_name(dev), attribute, version_string);
|
|
|
|
if (!strcmp(version_string, "1.0") || !strcmp(version_string, "0.90"))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
#else
|
|
|
|
int dev_is_md_component(struct cmd_context *cmd __attribute__((unused)),
|
|
struct device *dev __attribute__((unused)),
|
|
uint64_t *sb __attribute__((unused)))
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
unsigned long dev_md_stripe_width(struct dev_types *dt __attribute__((unused)),
|
|
struct device *dev __attribute__((unused)))
|
|
{
|
|
return 0UL;
|
|
}
|
|
|
|
#endif
|