1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-04 09:18:36 +03:00
lvm2/tools/pvscan.c
David Teigland 25b58310e3 pvscan: avoid full scan for activation
When an online PV completed a VG, the standard
activation functions were used to activate the VG.
These functions use a full scan of all devs.
When many pvscans are run during startup and need
to activate many VGs, scanning all devs from all
the pvscans can take a long time.

Optimize VG activation in pvscan to scan only the
devs in the VG being activated.  This makes use of
the online file info that was used to determine
the VG was complete.

The downside of this approach is that pvscan activation
will not detect duplicate PVs and block activation,
where a normal activation command (which scans all
devices) would.
2019-09-03 10:11:16 -05:00

1481 lines
41 KiB
C

/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "tools.h"
#include "lib/cache/lvmcache.h"
#include "lib/metadata/metadata.h"
#include "lib/label/hints.h"
#include <dirent.h>
#include <sys/file.h>
struct pvscan_params {
int new_pvs_found;
int pvs_found;
uint64_t size_total;
uint64_t size_new;
unsigned pv_max_name_len;
unsigned vg_max_name_len;
unsigned pv_tmp_namelen;
char *pv_tmp_name;
};
struct pvscan_aa_params {
unsigned int activate_errors;
};
static const char *_pvs_online_dir = DEFAULT_RUN_DIR "/pvs_online";
static const char *_vgs_online_dir = DEFAULT_RUN_DIR "/vgs_online";
static int _pvscan_display_pv(struct cmd_context *cmd,
struct physical_volume *pv,
struct pvscan_params *params)
{
/* XXXXXX-XXXX-XXXX-XXXX-XXXX-XXXX-XXXXXX */
char uuid[40] __attribute__((aligned(8)));
const unsigned suffix_len = sizeof(uuid) + 10;
unsigned pv_len;
const char *pvdevname = pv_dev_name(pv);
/* short listing? */
if (arg_is_set(cmd, short_ARG)) {
log_print_unless_silent("%s", pvdevname);
return ECMD_PROCESSED;
}
if (!params->pv_max_name_len) {
lvmcache_get_max_name_lengths(cmd, &params->pv_max_name_len, &params->vg_max_name_len);
params->pv_max_name_len += 2;
params->vg_max_name_len += 2;
params->pv_tmp_namelen = params->pv_max_name_len + suffix_len;
if (!(params->pv_tmp_name = dm_pool_alloc(cmd->mem, params->pv_tmp_namelen)))
return ECMD_FAILED;
}
pv_len = params->pv_max_name_len;
memset(params->pv_tmp_name, 0, params->pv_tmp_namelen);
if (arg_is_set(cmd, uuid_ARG)) {
if (!id_write_format(&pv->id, uuid, sizeof(uuid))) {
stack;
return ECMD_FAILED;
}
if (dm_snprintf(params->pv_tmp_name, params->pv_tmp_namelen, "%-*s with UUID %s",
params->pv_max_name_len - 2, pvdevname, uuid) < 0) {
log_error("Invalid PV name with uuid.");
return ECMD_FAILED;
}
pvdevname = params->pv_tmp_name;
pv_len += suffix_len;
}
if (is_orphan(pv))
log_print_unless_silent("PV %-*s %-*s %s [%s]",
pv_len, pvdevname,
params->vg_max_name_len, " ",
pv->fmt ? pv->fmt->name : " ",
display_size(cmd, pv_size(pv)));
else if (pv_status(pv) & EXPORTED_VG)
log_print_unless_silent("PV %-*s is in exported VG %s [%s / %s free]",
pv_len, pvdevname, pv_vg_name(pv),
display_size(cmd, (uint64_t) pv_pe_count(pv) * pv_pe_size(pv)),
display_size(cmd, (uint64_t) (pv_pe_count(pv) - pv_pe_alloc_count(pv)) * pv_pe_size(pv)));
else
log_print_unless_silent("PV %-*s VG %-*s %s [%s / %s free]",
pv_len, pvdevname,
params->vg_max_name_len, pv_vg_name(pv),
pv->fmt ? pv->fmt->name : " ",
display_size(cmd, (uint64_t) pv_pe_count(pv) * pv_pe_size(pv)),
display_size(cmd, (uint64_t) (pv_pe_count(pv) - pv_pe_alloc_count(pv)) * pv_pe_size(pv)));
return ECMD_PROCESSED;
}
static int _pvscan_display_single(struct cmd_context *cmd, struct volume_group *vg,
struct physical_volume *pv, struct processing_handle *handle)
{
struct pvscan_params *params = (struct pvscan_params *)handle->custom_handle;
if ((arg_is_set(cmd, exported_ARG) && !(pv_status(pv) & EXPORTED_VG)) ||
(arg_is_set(cmd, novolumegroup_ARG) && (!is_orphan(pv)))) {
return ECMD_PROCESSED;
}
params->pvs_found++;
if (is_orphan(pv)) {
params->new_pvs_found++;
params->size_new += pv_size(pv);
params->size_total += pv_size(pv);
} else {
params->size_total += (uint64_t) pv_pe_count(pv) * pv_pe_size(pv);
}
_pvscan_display_pv(cmd, pv, params);
return ECMD_PROCESSED;
}
int pvscan_display_cmd(struct cmd_context *cmd, int argc, char **argv)
{
struct pvscan_params params = { 0 };
struct processing_handle *handle = NULL;
int ret;
if (arg_is_set(cmd, novolumegroup_ARG) && arg_is_set(cmd, exported_ARG)) {
log_error("Options -e and -n are incompatible");
return EINVALID_CMD_LINE;
}
if (arg_is_set(cmd, exported_ARG) || arg_is_set(cmd, novolumegroup_ARG))
log_warn("WARNING: only considering physical volumes %s",
arg_is_set(cmd, exported_ARG) ?
"of exported volume group(s)" : "in no volume group");
if (!(handle = init_processing_handle(cmd, NULL))) {
log_error("Failed to initialize processing handle.");
ret = ECMD_FAILED;
goto out;
}
handle->custom_handle = &params;
ret = process_each_pv(cmd, argc, argv, NULL, 0, 0, handle, _pvscan_display_single);
if (!params.pvs_found)
log_print_unless_silent("No matching physical volumes found");
else
log_print_unless_silent("Total: %d [%s] / in use: %d [%s] / in no VG: %d [%s]",
params.pvs_found,
display_size(cmd, params.size_total),
params.pvs_found - params.new_pvs_found,
display_size(cmd, (params.size_total - params.size_new)),
params.new_pvs_found, display_size(cmd, params.size_new));
out:
destroy_processing_handle(cmd, handle);
return ret;
}
static char *_vgname_in_pvid_file_buf(char *buf)
{
char *p, *n;
/*
* file contains:
* <major>:<minor>\n
* vg:<vgname>\n\0
*/
if (!(p = strchr(buf, '\n')))
return NULL;
p++; /* skip \n */
if (*p && !strncmp(p, "vg:", 3)) {
if ((n = strchr(p, '\n')))
*n = '\0';
return p + 3;
}
return NULL;
}
#define MAX_PVID_FILE_SIZE 512
static int _online_pvid_file_read(char *path, int *major, int *minor, char *vgname)
{
char buf[MAX_PVID_FILE_SIZE];
char *name;
int fd, rv;
fd = open(path, O_RDONLY);
if (fd < 0) {
log_warn("Failed to open %s", path);
return 0;
}
memset(buf, 0, sizeof(buf));
rv = read(fd, buf, sizeof(buf));
if (close(fd))
log_sys_debug("close", path);
if (!rv || rv < 0) {
log_warn("No info in %s", path);
return 0;
}
if (sscanf(buf, "%d:%d", major, minor) != 2) {
log_warn("No device numbers in %s", path);
return 0;
}
/* vgname points to an offset in buf */
if ((name = _vgname_in_pvid_file_buf(buf)))
strncpy(vgname, name, NAME_LEN);
else
log_debug("No vgname in %s", path);
return 1;
}
/*
* When a PV goes offline, remove the vg online file for that VG
* (even if other PVs for the VG are still online). This means
* that the vg will be activated again when it becomes complete.
*/
static void _online_vg_file_remove(const char *vgname)
{
char path[PATH_MAX];
if (dm_snprintf(path, sizeof(path), "%s/%s", _vgs_online_dir, vgname) < 0) {
log_error("Path %s/%s is too long.", _vgs_online_dir, vgname);
return;
}
log_debug("Unlink vg online: %s", path);
if (unlink(path))
log_sys_debug("unlink", path);
}
/*
* When a device goes offline we only know its major:minor, not its PVID.
* Since the dev isn't around, we can't read it to get its PVID, so we have to
* read the PVID files to find the one containing this major:minor and remove
* that one. This means that the PVID files need to contain the devno's they
* were created from.
*/
static void _online_pvid_file_remove_devno(int major, int minor)
{
char path[PATH_MAX];
char file_vgname[NAME_LEN];
DIR *dir;
struct dirent *de;
int file_major = 0, file_minor = 0;
log_debug("Remove pv online devno %d:%d", major, minor);
if (!(dir = opendir(_pvs_online_dir)))
return;
while ((de = readdir(dir))) {
if (de->d_name[0] == '.')
continue;
memset(path, 0, sizeof(path));
snprintf(path, sizeof(path), "%s/%s", _pvs_online_dir, de->d_name);
file_major = 0;
file_minor = 0;
memset(file_vgname, 0, sizeof(file_vgname));
_online_pvid_file_read(path, &file_major, &file_minor, file_vgname);
if ((file_major == major) && (file_minor == minor)) {
log_debug("Unlink pv online %s", path);
if (unlink(path))
log_sys_debug("unlink", path);
if (file_vgname[0])
_online_vg_file_remove(file_vgname);
}
}
if (closedir(dir))
log_sys_debug("closedir", _pvs_online_dir);
}
static void _online_files_remove(const char *dirpath)
{
char path[PATH_MAX];
DIR *dir;
struct dirent *de;
if (!(dir = opendir(dirpath)))
return;
while ((de = readdir(dir))) {
if (de->d_name[0] == '.')
continue;
memset(path, 0, sizeof(path));
snprintf(path, sizeof(path), "%s/%s", dirpath, de->d_name);
if (unlink(path))
log_sys_debug("unlink", path);
}
if (closedir(dir))
log_sys_debug("closedir", dirpath);
}
static int _online_pvid_file_create(struct device *dev, const char *vgname)
{
char path[PATH_MAX];
char buf[MAX_PVID_FILE_SIZE];
char file_vgname[NAME_LEN];
int file_major = 0, file_minor = 0;
int major, minor;
int fd;
int rv;
int len;
int len1 = 0;
int len2 = 0;
memset(buf, 0, sizeof(buf));
major = (int)MAJOR(dev->dev);
minor = (int)MINOR(dev->dev);
if (dm_snprintf(path, sizeof(path), "%s/%s", _pvs_online_dir, dev->pvid) < 0) {
log_error("Path %s/%s is too long.", _pvs_online_dir, dev->pvid);
return 0;
}
if ((len1 = dm_snprintf(buf, sizeof(buf), "%d:%d\n", major, minor)) < 0) {
log_error("Cannot create online pv file for %d:%d.", major, minor);
return 0;
}
if (vgname) {
if ((len2 = dm_snprintf(buf + len1, sizeof(buf) - len1, "vg:%s\n", vgname)) < 0) {
log_warn("Incomplete online pv file for %d:%d vg %s.", major, minor, vgname);
/* can still continue without vgname */
len2 = 0;
}
}
len = len1 + len2;
log_debug("Create pv online: %s %d:%d %s", path, major, minor, dev_name(dev));
fd = open(path, O_CREAT | O_EXCL | O_RDWR, S_IRUSR | S_IWUSR);
if (fd < 0) {
if (errno == EEXIST)
goto check_duplicate;
log_error("Failed to open create %s: %d", path, errno);
return 0;
}
while (len > 0) {
rv = write(fd, buf, len);
if (rv < 0) {
log_error("Failed to write fd %d buf %s dev %s to %s: %d",
fd, buf, dev_name(dev), path, errno);
if (close(fd))
log_sys_debug("close", path);
return 0;
}
len -= rv;
}
/* We don't care about syncing, these files are not even persistent. */
if (close(fd))
log_sys_debug("close", path);
return 1;
check_duplicate:
/*
* If a PVID online file already exists for this PVID, check if the
* file contains a different device number, and if so we may have a
* duplicate PV.
*
* FIXME: disable autoactivation of the VG somehow?
* The VG may or may not already be activated when a dupicate appears.
* Perhaps write a new field in the pv online or vg online file?
*/
memset(file_vgname, 0, sizeof(file_vgname));
_online_pvid_file_read(path, &file_major, &file_minor, file_vgname);
if ((file_major == major) && (file_minor == minor)) {
log_debug("Existing online file for %d:%d", major, minor);
return 1;
}
/* Don't know how vgname might not match, but it's not good so fail. */
if ((file_major != major) || (file_minor != minor))
log_error("pvscan[%d] PV %s is duplicate for PVID %s on %d:%d and %d:%d.",
getpid(), dev_name(dev), dev->pvid, major, minor, file_major, file_minor);
if (file_vgname[0] && strcmp(file_vgname, vgname))
log_error("pvscan[%d] PV %s has unexpected VG %s vs %s.",
getpid(), dev_name(dev), vgname, file_vgname);
return 0;
}
static int _online_pvid_file_exists(const char *pvid)
{
char path[PATH_MAX];
struct stat buf;
int rv;
memset(path, 0, sizeof(path));
snprintf(path, sizeof(path), "%s/%s", _pvs_online_dir, pvid);
log_debug("Check pv online: %s", path);
rv = stat(path, &buf);
if (!rv) {
log_debug("Check pv online: yes");
return 1;
}
log_debug("Check pv online: no");
return 0;
}
static void _online_dir_setup(void)
{
struct stat st;
int rv;
if (!stat(DEFAULT_RUN_DIR, &st))
goto do_pvs;
log_debug("Creating run_dir.");
dm_prepare_selinux_context(DEFAULT_RUN_DIR, S_IFDIR);
rv = mkdir(DEFAULT_RUN_DIR, 0755);
dm_prepare_selinux_context(NULL, 0);
if ((rv < 0) && stat(DEFAULT_RUN_DIR, &st))
log_error("Failed to create %s %d", DEFAULT_RUN_DIR, errno);
do_pvs:
if (!stat(_pvs_online_dir, &st))
goto do_vgs;
log_debug("Creating pvs_online_dir.");
dm_prepare_selinux_context(_pvs_online_dir, S_IFDIR);
rv = mkdir(_pvs_online_dir, 0755);
dm_prepare_selinux_context(NULL, 0);
if ((rv < 0) && stat(_pvs_online_dir, &st))
log_error("Failed to create %s %d", _pvs_online_dir, errno);
do_vgs:
if (!stat(_vgs_online_dir, &st))
return;
log_debug("Creating vgs_online_dir.");
dm_prepare_selinux_context(_vgs_online_dir, S_IFDIR);
rv = mkdir(_vgs_online_dir, 0755);
dm_prepare_selinux_context(NULL, 0);
if ((rv < 0) && stat(_vgs_online_dir, &st))
log_error("Failed to create %s %d", _vgs_online_dir, errno);
}
static int _online_pv_found(struct cmd_context *cmd,
struct device *dev, struct dm_list *dev_args,
struct volume_group *vg,
struct dm_list *found_vgnames)
{
struct pv_list *pvl;
int pvids_not_online = 0;
int dev_args_in_vg = 0;
/*
* Create file named for pvid to record this PV is online.
*/
if (!_online_pvid_file_create(dev, vg ? vg->name : NULL))
return_0;
if (!vg || !found_vgnames) {
log_print("pvscan[%d] PV %s online.", getpid(), dev_name(dev));
return 1;
}
/*
* Check if all the PVs for this VG are online. This is only
* needed when autoactivating the VG which should be run only
* when the VG is complete. If the arrival of this dev completes
* the VG, then we want to activate the VG.
*/
dm_list_iterate_items(pvl, &vg->pvs) {
if (!_online_pvid_file_exists((const char *)&pvl->pv->id.uuid))
pvids_not_online++;
/* Check if one of the devs on the command line is in this VG. */
if (dev_args && dev_in_device_list(pvl->pv->dev, dev_args))
dev_args_in_vg = 1;
}
/*
* Return if we did not find an online file for one of the PVIDs
* in the VG, which means the VG is not yet complete.
*/
if (pvids_not_online) {
log_print("pvscan[%d] PV %s online, VG %s incomplete (need %d).",
getpid(), dev_name(dev), vg->name, pvids_not_online);
return 1;
}
log_print("pvscan[%d] PV %s online, VG %s is complete.", getpid(), dev_name(dev), vg->name);
/*
* When all PVIDs from the VG are online, then add vgname to
* found_vgnames.
*/
log_debug("online dev %s completes VG %s.", dev_name(dev), vg->name);
/*
* We either want to return all complete VGs that are found on any devs
* we are scanning, or we want to return complete VGs only when they
* contain PVs that were specified on the command line.
*/
if (!dev_args || dev_args_in_vg) {
log_debug("online dev %s can autoactivate VG %s", dev_name(dev), vg->name);
if (!str_list_add(cmd->mem, found_vgnames, dm_pool_strdup(cmd->mem, vg->name)))
stack;
}
return 1;
}
struct _pvscan_baton {
struct cmd_context *cmd;
struct volume_group *vg;
struct format_instance *fid;
};
/*
* If no mdas on this PV have a usable copy of the metadata,
* then the PV behaves like a PV without metadata, which
* causes the pvscan to scan all devs to find a copy of the
* metadata on another dev to check if the VG is now complete
* and can be activated.
*/
static int _online_pvscan_single(struct metadata_area *mda, void *baton)
{
struct _pvscan_baton *b = baton;
struct volume_group *vg;
struct device *mda_dev = mda_get_device(mda);
if (mda_is_ignored(mda))
return 1;
vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL);
if (!vg) {
/*
* Many or most cases of bad metadata would be found in
* the scan, and the mda removed from the list, so we
* would not get here to attempt this.
*/
log_print("pvscan[%d] metadata error in mda %d on %s.",
getpid(), mda->mda_num, dev_name(mda_dev));
return 1;
}
log_debug("pvscan vg_read %s seqno %u in mda %d on %s",
vg->name, vg->seqno, mda->mda_num, dev_name(mda_dev));
if (!b->vg || vg->seqno > b->vg->seqno)
b->vg = vg;
else if (b->vg)
release_vg(vg);
return 1;
}
static struct volume_group *_find_saved_vg(struct dm_list *saved_vgs, const char *vgname)
{
struct vg_list *vgl;
dm_list_iterate_items(vgl, saved_vgs) {
if (!strcmp(vgname, vgl->vg->name))
return vgl->vg;
}
return NULL;
}
/*
* disable_remove is 1 when resetting the online state, which begins with
* removing all pvid files, and then creating new pvid files for PVs that
* are found, so we don't need to try to remove pvid files here when a PV
* is not found on a device.
*/
static int _online_pvscan_one(struct cmd_context *cmd, struct device *dev,
struct dm_list *dev_args,
struct dm_list *found_vgnames,
struct dm_list *saved_vgs,
int disable_remove,
const char **pvid_without_metadata)
{
struct lvmcache_info *info;
struct vg_list *vgl;
struct _pvscan_baton baton;
const struct format_type *fmt;
/* Create a dummy instance. */
struct format_instance_ctx fic = { .type = 0 };
uint32_t ext_version;
uint32_t ext_flags;
int ret = 0;
log_debug("pvscan metadata from dev %s", dev_name(dev));
if (udev_dev_is_mpath_component(dev)) {
log_print("pvscan[%d] ignore multipath component %s.", getpid(), dev_name(dev));
return 1;
}
if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) {
log_debug("No PV info found on %s for PVID %s.", dev_name(dev), dev->pvid);
if (!disable_remove)
_online_pvid_file_remove_devno((int)MAJOR(dev->dev), (int)MINOR(dev->dev));
return 1;
}
if (!lvmcache_get_label(info)) {
log_debug("No PV label found for %s.", dev_name(dev));
if (!disable_remove)
_online_pvid_file_remove_devno((int)MAJOR(dev->dev), (int)MINOR(dev->dev));
return 1;
}
ext_version = lvmcache_ext_version(info);
ext_flags = lvmcache_ext_flags(info);
if ((ext_version >= 2) && !(ext_flags & PV_EXT_USED)) {
log_print("pvscan[%d] PV %s not used.", getpid(), dev_name(dev));
return 1;
}
fmt = lvmcache_fmt(info);
memset(&baton, 0, sizeof(baton));
baton.cmd = cmd;
baton.vg = NULL;
baton.fid = fmt->ops->create_instance(fmt, &fic);
if (!baton.fid) {
ret = 0;
goto_out;
}
lvmcache_foreach_mda(info, _online_pvscan_single, &baton);
if (!baton.vg) {
log_print("pvscan[%d] PV %s has no VG metadata.", getpid(), dev_name(dev));
if (pvid_without_metadata)
*pvid_without_metadata = dm_pool_strdup(cmd->mem, dev->pvid);
fmt->ops->destroy_instance(baton.fid);
} else {
set_pv_devices(baton.fid, baton.vg, NULL);
}
/* This check repeated because set_pv_devices can do new md check. */
if (dev->flags & DEV_IS_MD_COMPONENT) {
log_print("pvscan[%d] PV %s ignore MD component, ignore metadata.", getpid(), dev_name(dev));
if (baton.vg)
release_vg(baton.vg);
else
fmt->ops->destroy_instance(baton.fid);
return 1;
}
if (baton.vg && vg_is_shared(baton.vg)) {
log_print("pvscan[%d] PV %s ignore shared VG.", getpid(), dev_name(dev));
release_vg(baton.vg);
return 1;
}
if (baton.vg &&
baton.vg->system_id && baton.vg->system_id[0] &&
cmd->system_id && cmd->system_id[0] &&
vg_is_foreign(baton.vg)) {
log_verbose("Ignore PV %s with VG system id %s with our system id %s",
dev_name(dev), baton.vg->system_id, cmd->system_id);
log_print("pvscan[%d] PV %s ignore foreign VG.", getpid(), dev_name(dev));
release_vg(baton.vg);
return 1;
}
ret = _online_pv_found(cmd, dev, dev_args, baton.vg, found_vgnames);
/*
* Save vg's in case they need to be used at the end for checking PVs
* without metadata (in _check_vg_with_pvid_complete), or activating.
*/
if (saved_vgs && baton.vg) {
if (!_find_saved_vg(saved_vgs, baton.vg->name)) {
if ((vgl = malloc(sizeof(struct vg_list)))) {
vgl->vg = baton.vg;
baton.vg = NULL;
dm_list_add(saved_vgs, &vgl->list);
}
}
}
if (baton.vg)
release_vg(baton.vg);
out:
return ret;
}
/*
* This is to handle the case where pvscan --cache -aay (with no dev args)
* gets to the final PV, completing the VG, but that final PV does not
* have VG metadata. In this case, we need to use VG metadata from a
* previously scanned PV in the same VG, which we saved in the saved_vgs
* list. Using this saved metadata, we can find which VG this PVID
* belongs to, and then check if that VG is now complete, and if so
* add the VG name to the list of complete VGs to be autoactivated.
*
* The "pvid" arg here is the PVID of the PV that has just been scanned
* and didn't have metadata. We look through previously scanned VG
* metadata to find the VG this PVID belongs to, and then check that VG
* metadata to see if all the PVs are now online.
*/
static void _check_vg_with_pvid_complete(struct cmd_context *cmd,
struct dm_list *found_vgnames,
struct dm_list *saved_vgs,
const char *pvid)
{
struct vg_list *vgl;
struct pv_list *pvl;
struct volume_group *vg;
int pvids_not_online = 0;
int found;
dm_list_iterate_items(vgl, saved_vgs) {
vg = vgl->vg;
found = 0;
dm_list_iterate_items(pvl, &vg->pvs) {
if (strcmp((const char *)&pvl->pv->id.uuid, pvid))
continue;
found = 1;
break;
}
if (!found)
continue;
dm_list_iterate_items(pvl, &vg->pvs) {
if (!_online_pvid_file_exists((const char *)&pvl->pv->id.uuid)) {
pvids_not_online++;
break;
}
}
if (!pvids_not_online) {
log_debug("pvid %s makes complete VG %s", pvid, vg->name);
if (!str_list_add(cmd->mem, found_vgnames, dm_pool_strdup(cmd->mem, vg->name)))
stack;
} else
log_debug("pvid %s incomplete VG %s", pvid, vg->name);
break;
}
}
/*
* dev_args is the list of devices that were specified on the
* pvscan command line.
*
* . When dev_args is NULL, any complete VGs that are found will
* be returned in found_vgnames.
*
* . When dev_args is set, then complete VGs that that contain
* devs in dev_args will be returned in found_vgnames.
*
* found_vgnames is null for 'pvscan --cache' (without -aay)
* since the command does not need to keep track of complete
* vgs since it does not need to activate them.
*/
static void _online_pvscan_all_devs(struct cmd_context *cmd,
struct dm_list *found_vgnames,
struct dm_list *saved_vgs,
struct dm_list *dev_args)
{
struct dev_iter *iter;
struct device *dev;
const char *pvid_without_metadata;
lvmcache_label_scan(cmd);
if (!(iter = dev_iter_create(cmd->filter, 1))) {
log_error("dev_iter creation failed");
return;
}
while ((dev = dev_iter_get(cmd, iter))) {
if (sigint_caught()) {
stack;
break;
}
pvid_without_metadata = NULL;
if (!_online_pvscan_one(cmd, dev, dev_args, found_vgnames, saved_vgs, 1, &pvid_without_metadata)) {
stack;
break;
}
/* This PV without metadata may complete a VG. */
if (pvid_without_metadata && found_vgnames)
_check_vg_with_pvid_complete(cmd, found_vgnames, saved_vgs, pvid_without_metadata);
}
dev_iter_destroy(iter);
}
static int _pvscan_aa_single(struct cmd_context *cmd, const char *vg_name,
struct volume_group *vg, struct processing_handle *handle)
{
struct pvscan_aa_params *pp = (struct pvscan_aa_params *)handle->custom_handle;
if (vg_is_clustered(vg))
return ECMD_PROCESSED;
if (vg_is_exported(vg))
return ECMD_PROCESSED;
if (vg_is_shared(vg))
return ECMD_PROCESSED;
log_debug("pvscan autoactivating VG %s.", vg_name);
if (!vgchange_activate(cmd, vg, CHANGE_AAY)) {
log_error("%s: autoactivation failed.", vg->name);
pp->activate_errors++;
}
return ECMD_PROCESSED;
}
static int _online_vg_file_create(struct cmd_context *cmd, const char *vgname)
{
char path[PATH_MAX];
int fd;
if (dm_snprintf(path, sizeof(path), "%s/%s", _vgs_online_dir, vgname) < 0) {
log_error("Path %s/%s is too long.", _vgs_online_dir, vgname);
return 0;
}
log_debug("Create vg online: %s", path);
fd = open(path, O_CREAT | O_EXCL | O_TRUNC | O_RDWR, S_IRUSR | S_IWUSR);
if (fd < 0) {
log_debug("Failed to create %s: %d", path, errno);
return 0;
}
/* We don't care about syncing, these files are not even persistent. */
if (close(fd))
log_sys_debug("close", path);
return 1;
}
/*
* This is a very unconventional way of doing things because
* we want to figure out which devices to read the VG from
* without first scanning devices. It's usually the reverse;
* we have to scan all devs, which tells us which devs we
* need to read to get the VG.
*
* We can do it this way only by cheating and using the pvid
* online files for devs that have been scanned by prior pvscan
* instances.
*
* This is similar to the hints file, but the hints file is
* always a full picture of PV state, and is only ever created
* by scanning all devs, whereas the online files are only
* created incrementally by scanning one device at a time.
* The online files are only used for determining complete VGs
* for the purpose of autoactivation, and no attempt is made
* to keep them in sync with lvm state once autoactivation
* is complete, but much effort is made to always ensure hints
* will accurately reflect PV state.
*
* The saved VG metadata tells us which PVIDs are needed to
* complete the VG. The pv online files tell us which of those
* PVIDs are online, and the content of those pv online files
* tells us which major:minor number holds that PVID. The
* dev_cache tell us which struct device has the major:minor.
* We end up with a list of struct devices that we need to
* scan/read in order to process/activate the VG.
*/
static int _get_devs_from_saved_vg(struct cmd_context *cmd, char *vgname,
struct dm_list *saved_vgs,
struct dm_list *devs)
{
char path[PATH_MAX];
char file_vgname[NAME_LEN];
struct pv_list *pvl;
struct device_list *devl;
struct device *dev;
struct volume_group *vg;
const char *pvid;
dev_t devno;
int file_major = 0, file_minor = 0;
/*
* We previously saved the metadata (as a struct vg) from the device
* arg that was scanned. Now use that metadata to put together a list
* of devices for this VG. (This could alternatively be worked out by
* reading all the pvid online files, see which have a matching vg
* name, and getting the device numbers from those files.)
*/
if (!(vg = _find_saved_vg(saved_vgs, vgname)))
return_0;
dm_list_iterate_items(pvl, &vg->pvs) {
pvid = (const char *)&pvl->pv->id.uuid;
memset(path, 0, sizeof(path));
snprintf(path, sizeof(path), "%s/%s", _pvs_online_dir, pvid);
file_major = 0;
file_minor = 0;
memset(file_vgname, 0, sizeof(file_vgname));
_online_pvid_file_read(path, &file_major, &file_minor, file_vgname);
if (file_vgname[0] && strcmp(vgname, file_vgname)) {
log_error("Wrong VG found for %d:%d PVID %s: %s vs %s",
file_major, file_minor, pvid, vgname, file_vgname);
return 0;
}
devno = MKDEV(file_major, file_minor);
if (!(dev = dev_cache_get_by_devt(cmd, devno, NULL, NULL))) {
log_error("No device found for %d:%d PVID %s", file_major, file_minor, pvid);
return 0;
}
if (!(devl = zalloc(sizeof(*devl))))
return_0;
devl->dev = dev;
dm_list_add(devs, &devl->list);
log_debug("pvscan using %s for PVID %s in VG %s", dev_name(dev), pvid, vgname);
}
return 1;
}
/*
* When there's a single VG to activate (the common case),
* optimize things by cutting out the process_each_vg().
*
* The main point of this optimization is to avoid extra
* device scanning in the common case where we're
* activating a completed VG after scanning a single PV.
* The scanning overhead of hundreds of concurrent
* activations from hundreds of PVs appearing together
* can be overwhelming, and scanning needs to be reduced
* as much as possible.
*
* The common process_each_vg will generally do:
* label scan all devs
* vg_read
* lock vg
* label rescan of only vg devs (often skipped)
* read metadata
* set pv devices (find devs for each PVID)
* do command (vgchange_activate)
* unlock vg
*
* In this optimized version with process_each we do:
* lock vg
* label scan of only vg devs
* vg_read
* read metadata
* set pv devices (find devs for each PVID)
* do command (vgchange_activate)
* unlock vg
*
* The optimized version avoids scanning all devs, which
* is important when there are many devs.
*/
static int _pvscan_aa_direct(struct cmd_context *cmd, struct pvscan_aa_params *pp, char *vgname,
struct dm_list *saved_vgs)
{
struct dm_list devs; /* device_list */
struct volume_group *vg;
struct pv_list *pvl;
const char *vgid;
uint32_t lockd_state = 0;
uint32_t error_flags = 0;
int ret = ECMD_PROCESSED;
dm_list_init(&devs);
/*
* Get list of devices for this VG so we can label scan them.
* The saved VG struct gives the list of PVIDs in the VG.
* The pvs_online/PVID files gives us the devnums for PVIDs.
* The dev_cache gives us struct devices from the devnums.
*/
if (!_get_devs_from_saved_vg(cmd, vgname, saved_vgs, &devs)) {
log_error("pvscan activation for VG %s failed to find devices.", vgname);
return ECMD_FAILED;
}
/*
* Lock the VG before scanning so we don't need to
* rescan in _vg_read. (The lock_vol and the
* label rescan are then disabled in vg_read.)
*/
if (!lock_vol(cmd, vgname, LCK_VG_WRITE, NULL)) {
log_error("pvscan activation for VG %s failed to lock VG.", vgname);
return ECMD_FAILED;
}
/*
* Drop lvmcache info about the PV/VG that was saved
* when originally identifying the PV.
*/
lvmcache_destroy(cmd, 1, 1);
label_scan_devs(cmd, NULL, &devs);
if (!(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) {
log_error("pvscan activation for VG %s failed to find vgid.", vgname);
return ECMD_FAILED;
}
/*
* can_use_one_scan and READ_WITHOUT_LOCK are both important key
* changes made to vg_read that are possible because the VG is locked
* above (lock_vol).
*/
cmd->can_use_one_scan = 1;
vg = vg_read(cmd, vgname, vgid, READ_WITHOUT_LOCK | READ_FOR_ACTIVATE, lockd_state, &error_flags, NULL);
if (!vg) {
/*
* The common cases would already have been caught during the
* original device arg scan. There will be very few and unusual
* cases that would be caught here.
*/
log_error("pvscan activation for VG %s cannot read (%x).", vgname, error_flags);
return ECMD_FAILED;
}
/*
* These cases would already have been caught during the original
* device arg scan.
*/
if (vg_is_clustered(vg))
goto_out;
if (vg_is_exported(vg))
goto_out;
if (vg_is_shared(vg))
goto_out;
/*
* Verify that the devices we scanned above for the VG are in fact the
* devices used by the VG we read.
*/
dm_list_iterate_items(pvl, &vg->pvs) {
if (dev_in_device_list(pvl->pv->dev, &devs))
continue;
log_error("pvscan activation for VG %s found different devices.", vgname);
ret = ECMD_FAILED;
goto out;
}
log_debug("pvscan autoactivating VG %s.", vgname);
if (!vgchange_activate(cmd, vg, CHANGE_AAY)) {
log_error("%s: autoactivation failed.", vg->name);
pp->activate_errors++;
}
out:
unlock_vg(cmd, vg, vgname);
release_vg(vg);
return ret;
}
static int _pvscan_aa(struct cmd_context *cmd, struct pvscan_aa_params *pp,
struct dm_list *vgnames, struct dm_list *saved_vgs)
{
struct processing_handle *handle = NULL;
struct dm_str_list *sl, *sl2;
int ret;
if (dm_list_empty(vgnames)) {
log_debug("No VGs to autoactivate.");
return ECMD_PROCESSED;
}
if (!(handle = init_processing_handle(cmd, NULL))) {
log_error("Failed to initialize processing handle.");
return ECMD_FAILED;
}
handle->custom_handle = pp;
/*
* For each complete vg that can be autoactivated, see if this
* particular pvscan command should activate the vg. There can be
* multiple concurrent pvscans for the same completed vg (when all the
* PVs for the VG appear at once), and we want only one of the pvscans
* to run the activation. The first to create the file will do it.
*/
dm_list_iterate_items_safe(sl, sl2, vgnames) {
if (!_online_vg_file_create(cmd, sl->str)) {
log_print("pvscan[%d] VG %s skip autoactivation.", getpid(), sl->str);
str_list_del(vgnames, sl->str);
continue;
}
log_print("pvscan[%d] VG %s run autoactivation.", getpid(), sl->str);
}
if (dm_list_empty(vgnames)) {
destroy_processing_handle(cmd, handle);
return ECMD_PROCESSED;
}
if (dm_list_size(vgnames) == 1) {
dm_list_iterate_items(sl, vgnames)
ret = _pvscan_aa_direct(cmd, pp, (char *)sl->str, saved_vgs);
} else {
/* FIXME: suppress label scan in process_each if label scan already done? */
ret = process_each_vg(cmd, 0, NULL, NULL, vgnames, READ_FOR_ACTIVATE, 0, handle, _pvscan_aa_single);
}
destroy_processing_handle(cmd, handle);
return ret;
}
int pvscan_cache_cmd(struct cmd_context *cmd, int argc, char **argv)
{
struct pvscan_aa_params pp = { 0 };
struct dm_list add_devs;
struct dm_list rem_devs;
struct dm_list vgnames;
struct dm_list vglist;
struct dm_list *complete_vgnames = NULL;
struct dm_list *saved_vgs = NULL;
struct device *dev;
struct device_list *devl;
struct vg_list *vgl;
const char *pv_name;
const char *pvid_without_metadata = NULL;
int32_t major = -1;
int32_t minor = -1;
int devno_args = 0;
int all_devs;
struct arg_value_group_list *current_group;
dev_t devno;
int filtered;
int do_activate = arg_is_set(cmd, activate_ARG);
int add_errors = 0;
int add_single_count = 0;
int ret = ECMD_PROCESSED;
dm_list_init(&add_devs);
dm_list_init(&rem_devs);
dm_list_init(&vgnames);
dm_list_init(&vglist);
/*
* When systemd/udev run pvscan --cache commands, those commands
* should not wait on udev info since the udev info may not be
* complete until the pvscan --cache command is done.
*/
init_udev_sleeping(0);
if (do_activate) {
complete_vgnames = &vgnames;
saved_vgs = &vglist;
}
if (arg_is_set(cmd, major_ARG) + arg_is_set(cmd, minor_ARG))
devno_args = 1;
if (devno_args && (!arg_is_set(cmd, major_ARG) || !arg_is_set(cmd, minor_ARG))) {
log_error("Both --major and --minor required to identify devices.");
return EINVALID_CMD_LINE;
}
if (argc || devno_args) {
log_verbose("pvscan devices on command line.");
cmd->pvscan_cache_single = 1;
all_devs = 0;
} else {
all_devs = 1;
}
_online_dir_setup();
/* Creates a list of dev names from /dev, sysfs, etc; does not read any. */
dev_cache_scan();
if (cmd->md_component_detection && !cmd->use_full_md_check &&
!strcmp(cmd->md_component_checks, "auto") &&
dev_cache_has_md_with_end_superblock(cmd->dev_types)) {
log_debug("Enable full md component check.");
cmd->use_full_md_check = 1;
}
/*
* For each device command arg (from either position or --major/--minor),
* decide if that device is being added to the system (a dev node exists
* for it in /dev), or being removed from the system (no dev node exists
* for it in /dev). Create entries in add_devs/rem_devs for each arg
* accordingly.
*/
while (argc) {
argc--;
pv_name = *argv++;
if (pv_name[0] == '/') {
if (!(dev = dev_cache_get(cmd, pv_name, cmd->filter))) {
log_debug("pvscan arg %s not found.", pv_name);
if ((dev = dev_cache_get(cmd, pv_name, NULL))) {
/* nothing to do for this dev name */
log_print("pvscan[%d] device %s excluded by filter.", getpid(), dev_name(dev));
} else {
log_error("Physical Volume %s not found.", pv_name);
ret = ECMD_FAILED;
}
} else {
/*
* Scan device. This dev could still be removed
* below if it doesn't pass other filters.
*/
log_debug("pvscan arg %s found.", pv_name);
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_0;
devl->dev = dev;
dm_list_add(&add_devs, &devl->list);
}
} else {
if (sscanf(pv_name, "%d:%d", &major, &minor) != 2) {
log_warn("WARNING: Failed to parse major:minor from %s, skipping.", pv_name);
continue;
}
devno = MKDEV(major, minor);
if (!(dev = dev_cache_get_by_devt(cmd, devno, cmd->filter, &filtered))) {
if (filtered) {
if ((dev = dev_cache_get_by_devt(cmd, devno, NULL, NULL)))
log_print("pvscan[%d] device %d:%d %s excluded by filter.", getpid(), major, minor, dev_name(dev));
else
log_print("pvscan[%d] device %d:%d excluded by filter.", getpid(), major, minor);
} else
log_print("pvscan[%d] device %d:%d not found.", getpid(), major, minor);
if (!(dev = dm_pool_zalloc(cmd->mem, sizeof(struct device))))
return_0;
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_0;
dev->dev = devno;
devl->dev = dev;
dm_list_add(&rem_devs, &devl->list);
} else {
/*
* Scan device. This dev could still be removed
* below if it doesn't pass other filters.
*/
log_debug("pvscan arg %d:%d found.", major, minor);
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_0;
devl->dev = dev;
dm_list_add(&add_devs, &devl->list);
}
}
}
/* Process any grouped --major --minor args */
dm_list_iterate_items(current_group, &cmd->arg_value_groups) {
major = grouped_arg_int_value(current_group->arg_values, major_ARG, major);
minor = grouped_arg_int_value(current_group->arg_values, minor_ARG, minor);
if (major < 0 || minor < 0)
continue;
devno = MKDEV(major, minor);
if (!(dev = dev_cache_get_by_devt(cmd, devno, cmd->filter, &filtered))) {
if (filtered) {
if ((dev = dev_cache_get_by_devt(cmd, devno, NULL, NULL)))
log_print("pvscan[%d] device %d:%d %s excluded by filter.", getpid(), major, minor, dev_name(dev));
else
log_print("pvscan[%d] device %d:%d excluded by filter.", getpid(), major, minor);
} else
log_print("pvscan[%d] device %d:%d not found.", getpid(), major, minor);
if (!(dev = dm_pool_zalloc(cmd->mem, sizeof(struct device))))
return_0;
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_0;
dev->dev = devno;
devl->dev = dev;
dm_list_add(&rem_devs, &devl->list);
} else {
log_debug("pvscan arg %d:%d found.", major, minor);
if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
return_0;
devl->dev = dev;
dm_list_add(&add_devs, &devl->list);
}
}
/*
* No device args means rescan/regenerate/[reactivate] everything.
* Scan all devices when no args are given; clear all pvid
* files on recreate pvid files for existing devices.
* When -aay is set, any complete vg is activated
* (even if it's already active.)
*/
if (all_devs) {
/*
* pvscan --cache removes existing hints and recreates new ones.
* We begin by clearing hints at the start of the command.
* The pvscan_recreate_hints flag is used to enable the
* special case hint recreation in label_scan.
*/
cmd->pvscan_recreate_hints = 1;
pvscan_recreate_hints_begin(cmd);
log_verbose("pvscan all devices for requested refresh.");
_online_files_remove(_pvs_online_dir);
_online_files_remove(_vgs_online_dir);
_online_pvscan_all_devs(cmd, complete_vgnames, saved_vgs, NULL);
cmd->pvscan_recreate_hints = 0;
cmd->use_hints = 0;
goto activate;
}
log_verbose("pvscan only specific devices add %d rem %d.",
dm_list_size(&add_devs), dm_list_size(&rem_devs));
/*
* Unlink online files for devices that no longer have a device node.
* When unlinking a pvid file for dev, we don't need to scan the dev
* (we can't since it's gone), but we know which pvid file it is
* because the major:minor are saved in the pvid files which we can
* read to find the correct one.
*/
dm_list_iterate_items(devl, &rem_devs)
_online_pvid_file_remove_devno((int)MAJOR(devl->dev->dev), (int)MINOR(devl->dev->dev));
/*
* Create online files for devices that exist and pass the filter.
* When creating a pvid file for a dev, we have to scan it first
* to know that it's ours and what its pvid is (and which vg it
* belongs to if we want to do autoactivation.)
*/
if (!dm_list_empty(&add_devs)) {
label_scan_devs(cmd, cmd->filter, &add_devs);
dm_list_iterate_items(devl, &add_devs) {
dev = devl->dev;
if (dev->flags & DEV_FILTER_OUT_SCAN) {
log_print("pvscan[%d] device %s excluded by filter.", getpid(), dev_name(dev));
continue;
}
add_single_count++;
if (!_online_pvscan_one(cmd, dev, NULL, complete_vgnames, saved_vgs, 0, &pvid_without_metadata))
add_errors++;
}
}
/*
* After scanning only specific devs to add a device, there is a
* special case that requires us to then scan all devs. That is when
* the dev scanned has no VG metadata, and it's the final device to
* complete the VG. In this case we want to autoactivate the VG, but
* the scanned device does not know what VG it's in or whether that VG
* is now complete. In this case we need to scan all devs and pick out
* the complete VG holding this device so we can then autoactivate that
* VG.
*/
if (!dm_list_empty(&add_devs) && complete_vgnames && dm_list_empty(complete_vgnames) &&
pvid_without_metadata && do_activate) {
log_print("pvscan[%d] scan all devices for PV without metadata: %s.", getpid(), pvid_without_metadata);
_online_pvscan_all_devs(cmd, complete_vgnames, saved_vgs, &add_devs);
}
/*
* When a new PV appears, the system runs pvscan --cache dev.
* This also means that existing hints are invalid, and
* we can force hints to be refreshed here. There may be
* cases where this detects a change that the other methods
* of detecting invalid hints doesn't catch.
*/
if (add_single_count)
invalidate_hints(cmd);
activate:
/*
* Step 2: when the PV was recorded online, we check if all the
* PVs for the VG are online. If so, the vgname was added to the
* list, and we can attempt to autoactivate LVs in the VG.
*/
if (do_activate)
ret = _pvscan_aa(cmd, &pp, complete_vgnames, saved_vgs);
if (add_errors || pp.activate_errors)
ret = ECMD_FAILED;
dm_list_iterate_items(vgl, &vglist)
release_vg(vgl->vg);
if (!sync_local_dev_names(cmd))
stack;
return ret;
}
int pvscan(struct cmd_context *cmd, int argc, char **argv)
{
log_error(INTERNAL_ERROR "Missing function for command definition %d:%s.",
cmd->command->command_index, cmd->command->command_id);
return ECMD_FAILED;
}