From db98a6e3629aaf6bf42d0e840273b4328f930c89 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 21 May 2019 12:06:34 -0500 Subject: [PATCH 01/22] Additional MD component checking If udev info is missing for a device, (which would indicate if it's an MD component), then do an end-of-device read to check if a PV is an MD component. (This is skipped when using hints since we already know devs in hints are good.) A new config setting md_component_checks can be used to disable the additional end-of-device MD checks, or to always enable end-of-device MD checks. When both hints and udev info are disabled/unavailable, the end of PVs will now be scanned by default. If md devices with end-of-device superblocks are not being used, the extra I/O overhead can be avoided by setting md_component_checks="start". --- lib/cache/lvmcache.c | 7 +++- lib/commands/toolcontext.h | 2 + lib/config/config_settings.h | 25 +++++++++++- lib/config/defaults.h | 2 + lib/device/dev-md.c | 5 ++- lib/device/dev-type.c | 5 ++- lib/device/device.h | 1 + lib/label/label.c | 27 +++++++++++++ test/shell/lvm-on-md.sh | 75 +++++++++++++++++++++++++----------- tools/lvmcmdline.c | 55 ++++++++++++++++++++------ 10 files changed, 166 insertions(+), 38 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 8a29bf444..b62e5e24d 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -555,8 +555,11 @@ next: */ if (!(info = lvmcache_info_from_pvid(alt->dev->pvid, NULL, 0))) { - /* This shouldn't happen */ - log_warn("WARNING: PV %s on duplicate device %s not found in cache.", + /* + * This will happen if a duplicate dev has been dropped already, + * e.g. it was found to be an md component. + */ + log_debug("PVID %s on duplicate device %s not found in cache.", alt->dev->pvid, dev_name(alt->dev)); goto next; } diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index 0ea213286..7d373abb3 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -172,6 +172,7 @@ struct cmd_context { unsigned pvscan_cache_single:1; unsigned can_use_one_scan:1; unsigned is_clvmd:1; + unsigned md_component_detection:1; unsigned use_full_md_check:1; unsigned is_activating:1; unsigned enable_hints:1; /* hints are enabled for cmds in general */ @@ -184,6 +185,7 @@ struct cmd_context { */ struct dev_filter *filter; struct dm_list hints; + const char *md_component_checks; /* * Configuration. diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h index efa86e779..e718decd9 100644 --- a/lib/config/config_settings.h +++ b/lib/config/config_settings.h @@ -368,7 +368,30 @@ cfg(devices_multipath_component_detection_CFG, "multipath_component_detection", "Ignore devices that are components of DM multipath devices.\n") cfg(devices_md_component_detection_CFG, "md_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_MD_COMPONENT_DETECTION, vsn(1, 0, 18), NULL, 0, NULL, - "Ignore devices that are components of software RAID (md) devices.\n") + "Enable detection and exclusion of MD component devices.\n" + "An MD component device is a block device that MD uses as part\n" + "of a software RAID virtual device. When an LVM PV is created\n" + "on an MD device, LVM must only use the top level MD device as\n" + "the PV, and should ignore the underlying component devices.\n" + "In cases where the MD superblock is located at the end of the\n" + "component devices, it is more difficult for LVM to consistently\n" + "identify an MD component, see the md_component_checks setting.\n") + +cfg(devices_md_component_checks_CFG, "md_component_checks", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_MD_COMPONENT_CHECKS, vsn(2, 3, 2), NULL, 0, NULL, + "The checks LVM should use to detect MD component devices.\n" + "MD component devices are block devices used by MD software RAID.\n" + "#\n" + "Accepted values:\n" + " auto\n" + " LVM will skip scanning the end of devices when it has other\n" + " indications that the device is not an MD component.\n" + " start\n" + " LVM will only scan the start of devices for MD superblocks.\n" + " This does not incur extra I/O by LVM.\n" + " full\n" + " LVM will scan the start and end of devices for MD superblocks.\n" + " This requires an extra read at the end of devices.\n" + "#\n") cfg(devices_fw_raid_component_detection_CFG, "fw_raid_component_detection", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_FW_RAID_COMPONENT_DETECTION, vsn(2, 2, 112), NULL, 0, NULL, "Ignore devices that are components of firmware RAID devices.\n" diff --git a/lib/config/defaults.h b/lib/config/defaults.h index fac5b75a1..6a0d686dc 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -318,4 +318,6 @@ #define DEFAULT_IO_MEMORY_SIZE_KB 8192 +#define DEFAULT_MD_COMPONENT_CHECKS "auto" + #endif /* _LVM_DEFAULTS_H */ diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c index 261f3f1e2..08143b7fa 100644 --- a/lib/device/dev-md.c +++ b/lib/device/dev-md.c @@ -110,14 +110,17 @@ static int _udev_dev_is_md_component(struct device *dev) if (!(ext = dev_ext_get(dev))) return_0; - if (!(value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE))) + if (!(value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE))) { + dev->flags |= DEV_UDEV_INFO_MISSING; return 0; + } return !strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_SW_RAID); } #else static int _udev_dev_is_md_component(struct device *dev) { + dev->flags |= DEV_UDEV_INFO_MISSING; return 0; } #endif diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index 73e55cabc..4e35220c7 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -1170,8 +1170,10 @@ int udev_dev_is_md_component(struct device *dev) const char *value; int ret = 0; - if (!obtain_device_list_from_udev()) + if (!obtain_device_list_from_udev()) { + dev->flags |= DEV_UDEV_INFO_MISSING; return 0; + } if (!(udev_device = _udev_get_dev(dev))) return 0; @@ -1198,6 +1200,7 @@ int udev_dev_is_mpath_component(struct device *dev) int udev_dev_is_md_component(struct device *dev) { + dev->flags |= DEV_UDEV_INFO_MISSING; return 0; } diff --git a/lib/device/device.h b/lib/device/device.h index 395405ab3..30e1e79b3 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -37,6 +37,7 @@ #define DEV_BCACHE_WRITE 0x00008000 /* bcache_fd is open with RDWR */ #define DEV_SCAN_FOUND_LABEL 0x00010000 /* label scan read dev and found label */ #define DEV_IS_MD_COMPONENT 0x00020000 /* device is an md component */ +#define DEV_UDEV_INFO_MISSING 0x00040000 /* we have no udev info for this device */ /* * Support for external device info. diff --git a/lib/label/label.c b/lib/label/label.c index 10c1c4c0f..f6ba2d84d 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -1022,6 +1022,33 @@ int label_scan(struct cmd_context *cmd) } } + /* + * Stronger exclusion of md components that might have been + * misidentified as PVs due to having an end-of-device md superblock. + * If we're not using hints, and are not already doing a full md check + * on devs being scanned, then if udev info is missing for a PV, scan + * the end of the PV to verify it's not an md component. The full + * dev_is_md_component call will do new reads at the end of the dev. + */ + if (cmd->md_component_detection && !cmd->use_full_md_check && !using_hints && + !strcmp(cmd->md_component_checks, "auto")) { + int once = 0; + dm_list_iterate_items(devl, &scan_devs) { + if (!(devl->dev->flags & DEV_SCAN_FOUND_LABEL)) + continue; + if (!(devl->dev->flags & DEV_UDEV_INFO_MISSING)) + continue; + if (!once++) + log_debug_devs("Scanning end of PVs with no udev info for MD components"); + + if (dev_is_md_component(devl->dev, NULL, 1)) { + log_debug_devs("Drop PV from MD component %s", dev_name(devl->dev)); + devl->dev->flags &= ~DEV_SCAN_FOUND_LABEL; + lvmcache_del_dev(devl->dev); + } + } + } + dm_list_iterate_items_safe(devl, devl2, &all_devs) { dm_list_del(&devl->list); free(devl); diff --git a/test/shell/lvm-on-md.sh b/test/shell/lvm-on-md.sh index 41649bea7..22cbee84a 100644 --- a/test/shell/lvm-on-md.sh +++ b/test/shell/lvm-on-md.sh @@ -31,6 +31,10 @@ test -f /proc/mdstat && grep -q raid1 /proc/mdstat || \ aux lvmconf 'devices/md_component_detection = 1' +# This stops lvm from taking advantage of hints which +# will have already excluded md components. +aux lvmconf 'devices/hints = "none"' + # This stops lvm from asking udev if a dev is an md component. # LVM will ask udev if a dev is an md component, but we don't # want to rely on that ability in this test. @@ -61,6 +65,10 @@ check lv_field $vg/$lv1 lv_active "active" pvs "$mddev" not pvs "$dev1" not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out + sleep 1 vgchange -an $vg @@ -72,12 +80,14 @@ sleep 1 mdadm --stop "$mddev" aux udev_wait -# with md superblock 1.0 this pvs will report duplicates -# for the two md legs since the md device itself is not -# started -pvs 2>&1 |tee out -cat out -grep "prefers device" out +# The md components should still be detected and excluded. +not pvs "$dev1" +not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out + +pvs -vvvv # should not activate from the md legs not vgchange -ay $vg @@ -104,16 +114,20 @@ not grep "active" out mdadm --assemble "$mddev" "$dev1" "$dev2" aux udev_wait -# Now that the md dev is online, pvs can see it and -# ignore the two legs, so there's no duplicate warning +# Now that the md dev is online, pvs can see it +# and check for components even if +# md_component_checks is "start" (which disables +# most default end-of-device scans) +aux lvmconf 'devices/md_component_checks = "start"' -pvs 2>&1 |tee out -cat out -not grep "prefers device" out +not pvs "$dev1" +not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out -vgchange -ay $vg 2>&1 |tee out -cat out -not grep "prefers device" out + +vgchange -ay $vg check lv_field $vg/$lv1 lv_active "active" @@ -124,6 +138,9 @@ vgremove -f $vg aux cleanup_md_dev +# Put this setting back to the default +aux lvmconf 'devices/md_component_checks = "auto"' + # create 2 disk MD raid0 array # by default using metadata format 1.0 with data at the end of device # When a raid0 md array is stopped, the components will not look like @@ -149,6 +166,10 @@ check lv_field $vg/$lv1 lv_active "active" pvs "$mddev" not pvs "$dev1" not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out + sleep 1 vgchange -an $vg @@ -160,7 +181,14 @@ sleep 1 mdadm --stop "$mddev" aux udev_wait -pvs 2>&1 |tee pvs.out +# The md components should still be detected and excluded. +not pvs "$dev1" +not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out + +pvs -vvvv # should not activate from the md legs not vgchange -ay $vg @@ -187,16 +215,19 @@ not grep "active" out mdadm --assemble "$mddev" "$dev1" "$dev2" aux udev_wait -# Now that the md dev is online, pvs can see it and -# ignore the two legs, so there's no duplicate warning +# Now that the md dev is online, pvs can see it +# and check for components even if +# md_component_checks is "start" (which disables +# most default end-of-device scans) +aux lvmconf 'devices/md_component_checks = "start"' -pvs 2>&1 |tee out -cat out -not grep "prefers device" out +not pvs "$dev1" +not pvs "$dev2" +pvs > out +not grep "$dev1" out +not grep "$dev2" out vgchange -ay $vg 2>&1 |tee out -cat out -not grep "prefers device" out check lv_field $vg/$lv1 lv_active "active" diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index 79de85bfb..30f54e6a7 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -2766,20 +2766,53 @@ static int _init_lvmlockd(struct cmd_context *cmd) return 1; } +/* + * md_component_check full: always set use_full_md_check + * which causes filter-md to read the start+end of every + * device on the system (this could be optimized to only + * read the end of PVs.) + * + * md_component_check start: the end of devices will + * not generally be read to check for an md superblock + * (lvm may still scan for end-of-device md superblocks + * if it knows that some exists.) + * + * md_component_check auto: lvm will use some built-in + * heuristics to decide when it should scan the end of + * devices to look for md superblocks, e.g. commands + * like pvcreate that could clobber a component, or if + * udev info is not available and hints are not available. + */ static void _init_md_checks(struct cmd_context *cmd) { - /* - * use_full_md_check can also be set later. - * These commands are chosen to always perform - * a full md component check because they initialize - * a new device that could be an md component, - * and they are not run frequently during normal - * operation. - */ - if (!strcmp(cmd->name, "pvcreate") || - !strcmp(cmd->name, "vgcreate") || - !strcmp(cmd->name, "vgextend")) + const char *md_check; + + cmd->md_component_detection = find_config_tree_bool(cmd, devices_md_component_detection_CFG, NULL); + + md_check = find_config_tree_str(cmd, devices_md_component_checks_CFG, NULL); + if (!md_check) + cmd->md_component_checks = "auto"; + else if (!strcmp(md_check, "auto") || + !strcmp(md_check, "start") || + !strcmp(md_check, "full")) + cmd->md_component_checks = md_check; + else { + log_warn("Ignoring unknown md_component_checks setting, using auto."); + cmd->md_component_checks = "auto"; + } + + if (!strcmp(cmd->md_component_checks, "full")) cmd->use_full_md_check = 1; + else if (!strcmp(cmd->md_component_checks, "auto")) { + /* use_full_md_check can also be set later */ + if (!strcmp(cmd->name, "pvcreate") || + !strcmp(cmd->name, "vgcreate") || + !strcmp(cmd->name, "vgextend")) + cmd->use_full_md_check = 1; + } + + log_debug("Using md_component_checks %s use_full_md_check %d", + cmd->md_component_checks, cmd->use_full_md_check); } static int _cmd_no_meta_proc(struct cmd_context *cmd) From aeafdc1f45a5963290a5bc3ea1661018b072d817 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 12:08:00 -0600 Subject: [PATCH 02/22] add flags to keep track of bad metadata When reading metadata headers and text, use a new set of flags to identify specific errors that are seen. These will be used for more advanced repair in a subsequent commit. --- lib/format_text/format-text.c | 44 +++++++++++++++++++++++------------ lib/format_text/layout.h | 4 +++- lib/format_text/text_label.c | 3 ++- lib/metadata/metadata.h | 14 +++++++++++ 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 2831a53bd..f5e08fa63 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -161,7 +161,8 @@ static void _xlate_mdah(struct mda_header *mdah) } } -static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, int primary_mda) +static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, + int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields) { log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); @@ -169,53 +170,62 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev if (!dev_read_bytes(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); + *bad_fields |= BAD_MDA_READ; return 0; } if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl)))) { - log_error("Incorrect checksum in metadata area header on %s at %llu", + log_warn("WARNING: wrong checksum %x in mda header on %s at %llu", + mdah->checksum_xl, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + *bad_fields |= BAD_MDA_CHECKSUM; } _xlate_mdah(mdah); if (memcmp(mdah->magic, FMTT_MAGIC, sizeof(mdah->magic))) { - log_error("Wrong magic number in metadata area header on %s at %llu", + log_warn("WARNING: wrong magic number in mda header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + *bad_fields |= BAD_MDA_MAGIC; } if (mdah->version != FMTT_VERSION) { - log_error("Incompatible version %u metadata area header on %s at %llu", + log_warn("WARNING: wrong version %u in mda header on %s at %llu", mdah->version, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + *bad_fields |= BAD_MDA_VERSION; } if (mdah->start != dev_area->start) { - log_error("Incorrect start sector %llu in metadata area header on %s at %llu", + log_warn("WARNING: wrong start sector %llu in mda header on %s at %llu", (unsigned long long)mdah->start, dev_name(dev_area->dev), (unsigned long long)dev_area->start); - return 0; + *bad_fields |= BAD_MDA_START; } + *bad_fields &= ~ignore_bad_fields; + + if (*bad_fields) + return 0; + return 1; } struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area, int primary_mda) + struct device_area *dev_area, + int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields) { struct mda_header *mdah; if (!(mdah = dm_pool_alloc(fmt->cmd->mem, MDA_HEADER_SIZE))) { log_error("struct mda_header allocation failed"); + *bad_fields |= BAD_MDA_INTERNAL; return NULL; } - if (!_raw_read_mda_header(mdah, dev_area, primary_mda)) { + if (!_raw_read_mda_header(mdah, dev_area, primary_mda, ignore_bad_fields, bad_fields)) { dm_pool_free(fmt->cmd->mem, mdah); return NULL; } @@ -413,8 +423,9 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, time_t when; char *desc; uint32_t wrap = 0; + uint32_t bad_fields = 0; - if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) { + if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda, 0, &bad_fields))) { log_error("Failed to read vg %s from %s", vgname, dev_name(area->dev)); goto out; } @@ -535,6 +546,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, uint64_t old_start = 0, old_last = 0, old_size = 0, old_wrap = 0; uint64_t new_start = 0, new_last = 0, new_size = 0, new_wrap = 0; uint64_t max_size; + uint32_t bad_fields = 0; char *new_buf = NULL; int overlap; int found = 0; @@ -550,7 +562,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!found) return 1; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields))) goto_out; /* @@ -821,6 +833,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, struct raw_locn *rlocn_slot1; struct raw_locn *rlocn_new; struct pv_list *pvl; + uint32_t bad_fields = 0; int r = 0; int found = 0; @@ -841,7 +854,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, * mdah buffer, but the mdah buffer is not modified and mdac->rlocn is * modified. */ - if (!(mdab = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) + if (!(mdab = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields))) goto_out; /* @@ -1033,6 +1046,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, struct mda_header *mdah; struct raw_locn *rlocn_slot0; struct raw_locn *rlocn_slot1; + uint32_t bad_fields = 0; int r = 0; if (!(mdah = dm_pool_alloc(fid->fmt->cmd->mem, MDA_HEADER_SIZE))) { @@ -1046,7 +1060,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, * Just to print the warning? */ - if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda))) + if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda), 0, &bad_fields)) log_warn("WARNING: Removing metadata location on %s with bad mda header.", dev_name(mdac->area.dev)); diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index e1462f172..7320d9c2f 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -81,7 +81,9 @@ struct mda_header { } __attribute__ ((packed)); struct mda_header *raw_read_mda_header(const struct format_type *fmt, - struct device_area *dev_area, int primary_mda); + struct device_area *dev_area, int primary_mda, + uint32_t ignore_bad_fields, + uint32_t *bad_fields); struct mda_lists { struct metadata_area_ops *file_ops; diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index ef2f6c74f..6eca3c110 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -331,8 +331,9 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct mda_header *mdah; struct lvmcache_vgsummary vgsummary = { 0 }; + uint32_t bad_fields = 0; - if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda)))) { + if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda), 0, &bad_fields))) { log_error("Failed to read mda header from %s", dev_name(mdac->area.dev)); goto fail; } diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index a140c29f3..d904aa642 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -168,11 +168,25 @@ struct metadata_area_ops { #define MDA_CONTENT_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_CONTENT : DEV_IO_MDA_EXTRA_CONTENT) #define MDA_HEADER_REASON(primary_mda) ((primary_mda) ? DEV_IO_MDA_HEADER : DEV_IO_MDA_EXTRA_HEADER) +/* + * Flags describing errors found while reading. + */ +#define BAD_MDA_INTERNAL 0x00000001 /* internal lvm error */ +#define BAD_MDA_READ 0x00000002 /* read io failed */ +#define BAD_MDA_HEADER 0x00000004 /* general problem with header */ +#define BAD_MDA_TEXT 0x00000008 /* general problem with text */ +#define BAD_MDA_CHECKSUM 0x00000010 +#define BAD_MDA_MAGIC 0x00000020 +#define BAD_MDA_VERSION 0x00000040 +#define BAD_MDA_START 0x00000080 + struct metadata_area { struct dm_list list; struct metadata_area_ops *ops; void *metadata_locn; uint32_t status; + uint32_t bad_fields; /* BAD_MDA_ flags are set to indicate errors found when reading */ + uint32_t ignore_bad_fields; /* BAD_MDA_ flags are set to indicate errors to ignore */ }; struct metadata_area *mda_copy(struct dm_pool *mem, struct metadata_area *mda); From 650524b9559bb56339b281af52db9f4df91038ae Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 12:39:08 -0600 Subject: [PATCH 03/22] ability to keep track of bad mdas in lvmcache mda's that cannot be processed by lvm because of some corruption can be kept on a separate list. These will be used for more advanced repair in a subsequent commit. --- lib/cache/lvmcache.c | 55 +++++++++++++++++++++++++++++++++++++++++ lib/cache/lvmcache.h | 8 ++++++ lib/metadata/metadata.h | 3 ++- 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index b62e5e24d..a36e14535 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -31,6 +31,7 @@ struct lvmcache_info { struct dm_list mdas; /* list head for metadata areas */ struct dm_list das; /* list head for data areas */ struct dm_list bas; /* list head for bootloader areas */ + struct dm_list bad_mdas;/* list head for bad metadata areas */ struct lvmcache_vginfo *vginfo; /* NULL == unknown */ struct label *label; const struct format_type *fmt; @@ -39,6 +40,8 @@ struct lvmcache_info { uint32_t ext_version; /* Extension version */ uint32_t ext_flags; /* Extension flags */ uint32_t status; + bool mda1_bad; /* label scan found bad metadata in mda1 */ + bool mda2_bad; /* label scan found bad metadata in mda2 */ }; /* One per VG */ @@ -175,6 +178,54 @@ static void _destroy_duplicate_device_list(struct dm_list *head) dm_list_init(head); } +bool lvmcache_has_bad_metadata(struct device *dev) +{ + struct lvmcache_info *info; + + if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { + /* shouldn't happen */ + log_error("No lvmcache info for checking bad metadata on %s", dev_name(dev)); + return false; + } + + if (info->mda1_bad || info->mda2_bad) + return true; + return false; +} + +void lvmcache_save_bad_mda(struct lvmcache_info *info, struct metadata_area *mda) +{ + if (mda->mda_num == 1) + info->mda1_bad = true; + else if (mda->mda_num == 2) + info->mda2_bad = true; + dm_list_add(&info->bad_mdas, &mda->list); +} + +void lvmcache_get_bad_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *bad_mda_list) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + struct mda_list *mdal; + struct metadata_area *mda, *mda2; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_bad_mdas no vginfo %s", vgname); + return; + } + + dm_list_iterate_items(info, &vginfo->infos) { + dm_list_iterate_items_safe(mda, mda2, &info->bad_mdas) { + if (!(mdal = zalloc(sizeof(*mdal)))) + continue; + mdal->mda = mda; + dm_list_add(bad_mda_list, &mdal->list); + } + } +} + static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo, struct lvmcache_info *info) { @@ -1945,6 +1996,10 @@ void lvmcache_del_mdas(struct lvmcache_info *info) if (info->mdas.n) del_mdas(&info->mdas); dm_list_init(&info->mdas); + + if (info->bad_mdas.n) + del_mdas(&info->bad_mdas); + dm_list_init(&info->bad_mdas); } void lvmcache_del_das(struct lvmcache_info *info) diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 5e59b948d..dd6730e3f 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -220,4 +220,12 @@ void lvmcache_save_metadata_size(uint64_t val); int dev_in_device_list(struct device *dev, struct dm_list *head); +bool lvmcache_has_bad_metadata(struct device *dev); + +void lvmcache_save_bad_mda(struct lvmcache_info *info, struct metadata_area *mda); + +void lvmcache_get_bad_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *bad_mda_list); + #endif diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index d904aa642..6d158fe8a 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -185,6 +185,7 @@ struct metadata_area { struct metadata_area_ops *ops; void *metadata_locn; uint32_t status; + int mda_num; uint32_t bad_fields; /* BAD_MDA_ flags are set to indicate errors found when reading */ uint32_t ignore_bad_fields; /* BAD_MDA_ flags are set to indicate errors to ignore */ }; @@ -248,7 +249,7 @@ struct name_list { struct mda_list { struct dm_list list; - struct device_area mda; + struct metadata_area *mda; }; struct peg_list { From 0b18c25d934564015402de33e15a267045ed1b8c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 12:55:51 -0600 Subject: [PATCH 04/22] ability to keep track of outdated pvs in lvmcache Outdated PVs hold metadata for VG from which they have been removed. Add the ability to keep track of these in lvmcache. This will be used for more advanced repair in a subsequent commit. --- lib/cache/lvmcache.c | 80 ++++++++++++++++++++++++++++++++++++++++++++ lib/cache/lvmcache.h | 15 +++++++++ 2 files changed, 95 insertions(+) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index a36e14535..a86066015 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -48,6 +48,7 @@ struct lvmcache_info { struct lvmcache_vginfo { struct dm_list list; /* Join these vginfos together */ struct dm_list infos; /* List head for lvmcache_infos */ + struct dm_list outdated_infos; /* vg_read moves info from infos to outdated_infos */ const struct format_type *fmt; char *vgname; /* "" == orphan */ uint32_t status; @@ -1389,6 +1390,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info, return 0; } dm_list_init(&vginfo->infos); + dm_list_init(&vginfo->outdated_infos); /* * A different VG (different uuid) can exist with the same name. @@ -2392,3 +2394,81 @@ struct metadata_area *lvmcache_get_mda(struct cmd_context *cmd, return NULL; } +void lvmcache_get_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *devs) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + struct device_list *devl; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo %s", vgname); + return; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (!(devl = zalloc(sizeof(*devl)))) + return; + devl->dev = info->dev; + dm_list_add(devs, &devl->list); + } +} + +void lvmcache_del_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info, *info2; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_devs no vginfo"); + return; + } + + dm_list_iterate_items_safe(info, info2, &vginfo->outdated_infos) + lvmcache_del(info); +} + +void lvmcache_get_outdated_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev, + struct dm_list **mdas) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + *mdas = NULL; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo"); + return; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (info->dev != dev) + continue; + *mdas = &info->mdas; + return; + } +} + +bool lvmcache_is_outdated_dev(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { + log_error(INTERNAL_ERROR "lvmcache_get_outdated_mdas no vginfo"); + return false; + } + + dm_list_iterate_items(info, &vginfo->outdated_infos) { + if (info->dev == dev) + return true; + } + + return false; +} diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index dd6730e3f..0f83c80aa 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -222,6 +222,21 @@ int dev_in_device_list(struct device *dev, struct dm_list *head); bool lvmcache_has_bad_metadata(struct device *dev); +void lvmcache_get_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct dm_list *devs); +void lvmcache_get_outdated_mdas(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev, + struct dm_list **mdas); + +bool lvmcache_is_outdated_dev(struct cmd_context *cmd, + const char *vgname, const char *vgid, + struct device *dev); + +void lvmcache_del_outdated_devs(struct cmd_context *cmd, + const char *vgname, const char *vgid); + void lvmcache_save_bad_mda(struct lvmcache_info *info, struct metadata_area *mda); void lvmcache_get_bad_mdas(struct cmd_context *cmd, From b2447e3538db370d1e12d328c25ca1f078ddabf6 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 13:09:56 -0600 Subject: [PATCH 05/22] keep track of which mdas have old metadata in lvmcache This will be used for more advanced repair in a subsequent commit. --- lib/cache/lvmcache.c | 179 ++++++++++++++++++++++++++++++++++--------- lib/cache/lvmcache.h | 10 ++- 2 files changed, 148 insertions(+), 41 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index a86066015..f54eff0c9 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -42,6 +42,10 @@ struct lvmcache_info { uint32_t status; bool mda1_bad; /* label scan found bad metadata in mda1 */ bool mda2_bad; /* label scan found bad metadata in mda2 */ + bool summary_seqno_mismatch; /* two mdas on this dev has mismatching metadata */ + int summary_seqno; /* vg seqno found on this dev during scan */ + int mda1_seqno; + int mda2_seqno; }; /* One per VG */ @@ -61,7 +65,7 @@ struct lvmcache_vginfo { uint32_t mda_checksum; size_t mda_size; int seqno; - int scan_summary_mismatch; /* vgsummary from devs had mismatching seqno or checksum */ + bool scan_summary_mismatch; /* vgsummary from devs had mismatching seqno or checksum */ }; static struct dm_hash_table *_pvid_hash = NULL; @@ -1515,12 +1519,9 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) } /* - * FIXME: get rid of other callers of this function which call it - * in odd cases to "fix up" some bit of lvmcache state. Make those - * callers fix up what they need to directly, and leave this function - * with one purpose and caller. + * Returning 0 causes the caller to remove the info struct for this + * device from lvmcache, which will make it look like a missing device. */ - int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { const char *vgname = vgsummary->vgname; @@ -1546,6 +1547,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg * Puts the vginfo into the vgname hash table. */ if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt)) { + /* shouldn't happen, internal error */ log_error("Failed to update VG %s info in lvmcache.", vgname); return 0; } @@ -1554,6 +1556,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg * Puts the vginfo into the vgid hash table. */ if (!_lvmcache_update_vgid(info, info->vginfo, vgid)) { + /* shouldn't happen, internal error */ log_error("Failed to update VG %s info in lvmcache.", vgname); return 0; } @@ -1569,50 +1572,114 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg if (!vgsummary->seqno && !vgsummary->mda_size && !vgsummary->mda_checksum) return 1; + /* + * Keep track of which devs/mdas have old versions of the metadata. + * The values we keep in vginfo are from the metadata with the largest + * seqno. One dev may have more recent metadata than another dev, and + * one mda may have more recent metadata than the other mda on the same + * device. + * + * When a device holds old metadata, the info struct for the device + * remains in lvmcache, so the device is not treated as missing. + * Also the mda struct containing the old metadata is kept on + * info->mdas. This means that vg_read will read metadata from + * the mda again (and probably see the same old metadata). It + * also means that vg_write will use the mda to write new metadata + * into the mda that currently has the old metadata. + */ + if (vgsummary->mda_num == 1) + info->mda1_seqno = vgsummary->seqno; + else if (vgsummary->mda_num == 2) + info->mda2_seqno = vgsummary->seqno; + + if (!info->summary_seqno) + info->summary_seqno = vgsummary->seqno; + else { + if (info->summary_seqno == vgsummary->seqno) { + /* This mda has the same metadata as the prev mda on this dev. */ + return 1; + + } else if (info->summary_seqno > vgsummary->seqno) { + /* This mda has older metadata than the prev mda on this dev. */ + info->summary_seqno_mismatch = true; + + } else if (info->summary_seqno < vgsummary->seqno) { + /* This mda has newer metadata than the prev mda on this dev. */ + info->summary_seqno_mismatch = true; + info->summary_seqno = vgsummary->seqno; + } + } + + /* this shouldn't happen */ if (!(vginfo = info->vginfo)) return 1; if (!vginfo->seqno) { vginfo->seqno = vgsummary->seqno; - - log_debug_cache("lvmcache %s: VG %s: set seqno to %d", - dev_name(info->dev), vginfo->vgname, vginfo->seqno); - - } else if (vgsummary->seqno != vginfo->seqno) { - log_warn("Scan of VG %s from %s found metadata seqno %d vs previous %d.", - vgname, dev_name(info->dev), vgsummary->seqno, vginfo->seqno); - vginfo->scan_summary_mismatch = 1; - /* If we don't return success, this dev info will be removed from lvmcache, - and then we won't be able to rescan it or repair it. */ - return 1; - } - - if (!vginfo->mda_size) { vginfo->mda_checksum = vgsummary->mda_checksum; vginfo->mda_size = vgsummary->mda_size; - log_debug_cache("lvmcache %s: VG %s: set mda_checksum to %x mda_size to %zu", - dev_name(info->dev), vginfo->vgname, - vginfo->mda_checksum, vginfo->mda_size); + log_debug_cache("lvmcache %s mda%d VG %s set seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); + goto update_vginfo; - } else if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { - log_warn("Scan of VG %s from %s found mda_checksum %x mda_size %zu vs previous %x %zu", - vgname, dev_name(info->dev), vgsummary->mda_checksum, vgsummary->mda_size, - vginfo->mda_checksum, vginfo->mda_size); - vginfo->scan_summary_mismatch = 1; - /* If we don't return success, this dev info will be removed from lvmcache, - and then we won't be able to rescan it or repair it. */ + } else if (vgsummary->seqno < vginfo->seqno) { + vginfo->scan_summary_mismatch = true; + + log_debug_cache("lvmcache %s mda%d VG %s older seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); + return 1; + + } else if (vgsummary->seqno > vginfo->seqno) { + vginfo->scan_summary_mismatch = true; + + /* Replace vginfo values with values from newer metadata. */ + vginfo->seqno = vgsummary->seqno; + vginfo->mda_checksum = vgsummary->mda_checksum; + vginfo->mda_size = vgsummary->mda_size; + + log_debug_cache("lvmcache %s mda%d VG %s newer seqno %u checksum %x mda_size %zu", + dev_name(info->dev), vgsummary->mda_num, vgname, + vgsummary->seqno, vgsummary->mda_checksum, vgsummary->mda_size); + + goto update_vginfo; + } else { + /* + * Same seqno as previous metadata we saw for this VG. + * If the metadata somehow has a different checksum or size, + * even though it has the same seqno, something has gone wrong. + * FIXME: test this case: VG has two PVs, first goes missing, + * second updated to seqno 4, first comes back and second goes + * missing, first updated to seqno 4, second comes back, now + * both are present with same seqno but different checksums. + */ + + if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { + log_warn("WARNING: scan of VG %s from %s mda%d found mda_checksum %x mda_size %zu vs %x %zu", + vgname, dev_name(info->dev), vgsummary->mda_num, + vgsummary->mda_checksum, vgsummary->mda_size, + vginfo->mda_checksum, vginfo->mda_size); + vginfo->scan_summary_mismatch = true; + return 0; + } + + /* + * The seqno and checksum matches what was previously seen; + * the summary values have already been saved in vginfo. + */ return 1; } - /* - * If a dev has an unmatching checksum, ignore the other - * info from it, keeping the info we already saved. - */ + update_vginfo: if (!_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id)) { + /* + * This shouldn't happen, it's an internal errror, and we can leave + * the info in place without saving the summary values in vginfo. + */ log_error("Failed to update VG %s info in lvmcache.", vgname); - return 0; } return 1; @@ -2325,17 +2392,17 @@ int lvmcache_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const ch * lvmcache: info for dev5 is deleted, FIXME: use a defective state */ -int lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid) +bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid) { struct lvmcache_vginfo *vginfo; if (!vgname || !vgid) - return 1; + return true; if ((vginfo = lvmcache_vginfo_from_vgid(vgid))) return vginfo->scan_summary_mismatch; - return 1; + return true; } static uint64_t _max_metadata_size; @@ -2394,6 +2461,42 @@ struct metadata_area *lvmcache_get_mda(struct cmd_context *cmd, return NULL; } +/* + * This is used by the metadata repair command to check if + * the metadata on a dev needs repair because it's old. + */ +bool lvmcache_has_old_metadata(struct cmd_context *cmd, const char *vgname, const char *vgid, struct device *dev) +{ + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + /* shouldn't happen */ + if (!vgname || !vgid) + return false; + + /* shouldn't happen */ + if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) + return false; + + /* shouldn't happen */ + if (!(info = lvmcache_info_from_pvid(dev->pvid, NULL, 0))) + return false; + + /* writing to a new PV */ + if (!info->summary_seqno) + return false; + + /* on same dev, one mda has newer metadata than the other */ + if (info->summary_seqno_mismatch) + return true; + + /* one or both mdas on this dev has older metadata than another dev */ + if (vginfo->seqno > info->summary_seqno) + return true; + + return false; +} + void lvmcache_get_outdated_devs(struct cmd_context *cmd, const char *vgname, const char *vgid, struct dm_list *devs) diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 0f83c80aa..16cbb4874 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -57,10 +57,12 @@ struct lvmcache_vgsummary { char *creation_host; const char *system_id; const char *lock_type; + uint32_t seqno; uint32_t mda_checksum; size_t mda_size; - int zero_offset; - int seqno; + int mda_num; /* 1 = summary from mda1, 2 = summary from mda2 */ + unsigned mda_ignored:1; + unsigned zero_offset:1; }; int lvmcache_init(struct cmd_context *cmd); @@ -202,7 +204,7 @@ int lvmcache_get_vg_devs(struct cmd_context *cmd, struct dm_list *devs); void lvmcache_set_independent_location(const char *vgname); -int lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid); +bool lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid); int lvmcache_vginfo_has_pvid(struct lvmcache_vginfo *vginfo, char *pvid); @@ -222,6 +224,8 @@ int dev_in_device_list(struct device *dev, struct dm_list *head); bool lvmcache_has_bad_metadata(struct device *dev); +bool lvmcache_has_old_metadata(struct cmd_context *cmd, const char *vgname, const char *vgid, struct device *dev); + void lvmcache_get_outdated_devs(struct cmd_context *cmd, const char *vgname, const char *vgid, struct dm_list *devs); From 889b5d3183314985d4d0930b30f0cd4b0f482f69 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 13:24:23 -0600 Subject: [PATCH 06/22] add mda arg to add_mda Allow the caller of lvmcache_add_mda() to have the new mda returned. --- lib/cache/lvmcache.c | 5 +++-- lib/cache/lvmcache.h | 3 ++- lib/format_text/format-text.c | 2 +- lib/format_text/format-text.h | 3 ++- lib/format_text/text_label.c | 7 +++++-- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index f54eff0c9..3e89b0288 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -2086,9 +2086,10 @@ void lvmcache_del_bas(struct lvmcache_info *info) } int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev, - uint64_t start, uint64_t size, unsigned ignored) + uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new) { - return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored); + return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored, mda_new); } int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size) diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 16cbb4874..21f29ef90 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -129,7 +129,8 @@ void lvmcache_del_mdas(struct lvmcache_info *info); void lvmcache_del_das(struct lvmcache_info *info); void lvmcache_del_bas(struct lvmcache_info *info); int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev, - uint64_t start, uint64_t size, unsigned ignored); + uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new); int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size); int lvmcache_add_ba(struct lvmcache_info *info, uint64_t start, uint64_t size); diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index f5e08fa63..df1e56b4e 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1547,7 +1547,7 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume // if fmt is not the same as info->fmt we are in trouble if (!lvmcache_add_mda(info, mdac->area.dev, mdac->area.start, mdac->area.size, - mda_is_ignored(mda))) + mda_is_ignored(mda), NULL)) return_0; } diff --git a/lib/format_text/format-text.h b/lib/format_text/format-text.h index 1300e58af..c42d5c0f7 100644 --- a/lib/format_text/format-text.h +++ b/lib/format_text/format-text.h @@ -61,7 +61,8 @@ int add_ba(struct dm_pool *mem, struct dm_list *eas, uint64_t start, uint64_t size); void del_bas(struct dm_list *bas); int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *mdas, - struct device *dev, uint64_t start, uint64_t size, unsigned ignored); + struct device *dev, uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new); void del_mdas(struct dm_list *mdas); /* On disk */ diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 6eca3c110..5f4bcfd8e 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -243,7 +243,8 @@ void del_bas(struct dm_list *bas) /* FIXME: refactor this function with other mda constructor code */ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *mdas, - struct device *dev, uint64_t start, uint64_t size, unsigned ignored) + struct device *dev, uint64_t start, uint64_t size, unsigned ignored, + struct metadata_area **mda_new) { /* FIXME List size restricted by pv_header SECTOR_SIZE */ struct metadata_area *mdal, *mda; @@ -295,6 +296,8 @@ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list * mda_set_ignored(mdal, ignored); dm_list_add(mdas, &mdal->list); + if (mda_new) + *mda_new = mdal; return 1; } @@ -408,7 +411,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, /* Metadata area headers */ dlocn_xl++; while ((offset = xlate64(dlocn_xl->offset))) { - lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0); + lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0, NULL); dlocn_xl++; } From 86d831b9165d71769cd0fc05b52bc7469760e2f0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 13:40:34 -0600 Subject: [PATCH 07/22] change args for text label read function Have the caller pass the label_sector to the read function so the read function can set the sector field in the label struct, instead of having the read function return a pointer to the label for the caller to set the sector field. Also have the read function return a flag indicating to the caller that the scanned device was identified as a duplicate pv. --- lib/cache/lvmcache.c | 14 ++++++++---- lib/cache/lvmcache.h | 6 +++--- lib/format_text/format-text.c | 7 +++--- lib/format_text/text_label.c | 27 +++++++++++++++++------ lib/label/label.c | 40 ++++++++++++++++++++++++++++------- lib/label/label.h | 2 +- 6 files changed, 70 insertions(+), 26 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 3e89b0288..398717310 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -1760,7 +1760,7 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) * transient duplicate? */ -static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev) +static struct lvmcache_info * _create_info(struct labeller *labeller, struct device *dev, uint64_t label_sector) { struct lvmcache_info *info; struct label *label; @@ -1773,6 +1773,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev return NULL; } + label->dev = dev; + label->sector = label_sector; + info->dev = dev; info->fmt = labeller->fmt; @@ -1788,8 +1791,9 @@ static struct lvmcache_info * _create_info(struct labeller *labeller, struct dev } struct lvmcache_info *lvmcache_add(struct labeller *labeller, - const char *pvid, struct device *dev, - const char *vgname, const char *vgid, uint32_t vgstatus) + const char *pvid, struct device *dev, uint64_t label_sector, + const char *vgname, const char *vgid, uint32_t vgstatus, + int *is_duplicate) { char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); char uuid[64] __attribute__((aligned(8))); @@ -1817,7 +1821,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, info = lvmcache_info_from_pvid(dev->pvid, NULL, 0); if (!info) { - info = _create_info(labeller, dev); + info = _create_info(labeller, dev, label_sector); created = 1; } @@ -1849,6 +1853,8 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, dm_list_add(&_found_duplicate_devs, &devl->list); _found_duplicate_pvs = 1; + if (is_duplicate) + *is_duplicate = 1; return NULL; } diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 21f29ef90..d4c19f9f4 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -74,9 +74,9 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const /* Add/delete a device */ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid, - struct device *dev, - const char *vgname, const char *vgid, - uint32_t vgstatus); + struct device *dev, uint64_t label_sector, + const char *vgname, const char *vgid, + uint32_t vgstatus, int *is_duplicate); int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt); void lvmcache_del(struct lvmcache_info *info); void lvmcache_del_dev(struct device *dev); diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index df1e56b4e..9d58c53b3 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1513,13 +1513,12 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume /* Add a new cache entry with PV info or update existing one. */ if (!(info = lvmcache_add(fmt->labeller, (const char *) &pv->id, - pv->dev, pv->vg_name, - is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0))) + pv->dev, pv->label_sector, pv->vg_name, + is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0, NULL))) return_0; + /* lvmcache_add() creates info and info->label structs for the dev, get info->label. */ label = lvmcache_get_label(info); - label->sector = pv->label_sector; - label->dev = pv->dev; lvmcache_update_pv(info, pv, fmt); diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 5f4bcfd8e..42184dec7 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -371,8 +371,8 @@ fail: return 0; } -static int _text_read(struct labeller *l, struct device *dev, void *label_buf, - struct label **label) +static int _text_read(struct labeller *labeller, struct device *dev, void *label_buf, + uint64_t label_sector, int *is_duplicate) { struct label_header *lh = (struct label_header *) label_buf; struct pv_header *pvhdr; @@ -382,18 +382,33 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, uint64_t offset; uint32_t ext_version; struct _update_mda_baton baton; + struct label *label; /* * PV header base */ pvhdr = (struct pv_header *) ((char *) label_buf + xlate32(lh->offset_xl)); - if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, + /* + * FIXME: stop adding the device to lvmcache initially as an orphan + * (and then moving it later) and instead just add it when we know the + * VG. + * + * If another device with this same PVID has already been seen, + * lvmcache_add will put this device in the duplicates list in lvmcache + * and return NULL. At the end of label_scan, the duplicate devs are + * compared, and if another dev is preferred for this PV, then the + * existing dev is removed from lvmcache and _text_read is called again + * for this dev, and lvmcache_add will add it. + * + * Other reasons for lvmcache_add to return NULL are internal errors. + */ + if (!(info = lvmcache_add(labeller, (char *)pvhdr->pv_uuid, dev, label_sector, FMT_TEXT_ORPHAN_VG_NAME, - FMT_TEXT_ORPHAN_VG_NAME, 0))) + FMT_TEXT_ORPHAN_VG_NAME, 0, is_duplicate))) return_0; - *label = lvmcache_get_label(info); + label = lvmcache_get_label(info); lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl)); @@ -441,7 +456,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *label_buf, } out: baton.info = info; - baton.label = *label; + baton.label = label; /* * In the vg_read phase, we compare all mdas and decide which to use diff --git a/lib/label/label.c b/lib/label/label.c index f6ba2d84d..4d008ed14 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -356,9 +356,9 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, int *is_lvm_device) { char label_buf[LABEL_SIZE] __attribute__((aligned(8))); - struct label *label = NULL; struct labeller *labeller; uint64_t sector = 0; + int is_duplicate = 0; int ret = 0; int pass; @@ -423,17 +423,41 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, /* * This is the point where the scanning code dives into the rest of - * lvm. ops->read() is usually _text_read() which reads the pv_header, - * mda locations, mda contents. As these bits of data are read, they - * are saved into lvmcache as info/vginfo structs. + * lvm. ops->read() is _text_read() which reads the pv_header, mda + * locations, and metadata text. All of the info it finds about the PV + * and VG is stashed in lvmcache which saves it in the form of + * info/vginfo structs. That lvmcache info is used later when the + * command wants to read the VG to do something to it. */ + ret = labeller->ops->read(labeller, dev, label_buf, sector, &is_duplicate); - if ((ret = (labeller->ops->read)(labeller, dev, label_buf, &label)) && label) { - label->dev = dev; - label->sector = sector; - } else { + if (!ret) { /* FIXME: handle errors */ lvmcache_del_dev(dev); + + if (is_duplicate) { + /* + * _text_read() called lvmcache_add() which found an + * existing info struct for this PVID but for a + * different dev. lvmcache_add() did not add an info + * struct for this dev, but added this dev to the list + * of duplicate devs. + */ + log_warn("WARNING: scan found duplicate PVID %s on %s", dev->pvid, dev_name(dev)); + } else { + /* + * Leave the info in lvmcache because the device is + * present and can still be used even if it has + * metadata that we can't process (we can get metadata + * from another PV/mda.) _text_read only saves mdas + * with good metadata in lvmcache (this includes old + * metadata), and if a PV has no mdas with good + * metadata, then the info for the PV will be in + * lvmcache with empty info->mdas, and it will behave + * like a PV with no mdas (a common configuration.) + */ + log_warn("WARNING: scan failed to get metadata summary from %s PVID %s", dev_name(dev), dev->pvid); + } } out: return ret; diff --git a/lib/label/label.h b/lib/label/label.h index 8f1f0e2d4..07bb77d88 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -65,7 +65,7 @@ struct label_ops { * Read a label from a volume. */ int (*read) (struct labeller * l, struct device * dev, - void *label_buf, struct label ** label); + void *label_buf, uint64_t label_sector, int *is_duplicate); /* * Populate label_type etc. From 027e0e92e6edcde98fd951286c21a29f22f3ec20 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 5 Feb 2019 14:02:24 -0600 Subject: [PATCH 08/22] fix vg_commit return value The existing comment was desribing the correct behavior, but the code didn't match. The commit is successful if one mda was committed. Making it depend on the result of the internal lvmcache update was wrong. --- lib/metadata/metadata.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 65da7e138..49c1e748c 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3072,6 +3072,7 @@ static int _vg_commit_mdas(struct volume_group *vg) struct metadata_area *mda, *tmda; struct dm_list ignored; int failed = 0; + int good = 0; int cache_updated = 0; /* Rearrange the metadata_areas_in_use so ignored mdas come first. */ @@ -3092,27 +3093,31 @@ static int _vg_commit_mdas(struct volume_group *vg) !mda->ops->vg_commit(vg->fid, vg, mda)) { stack; failed = 1; - } + } else + good++; + /* Update cache first time we succeed */ if (!failed && !cache_updated) { lvmcache_update_vg(vg, 0); cache_updated = 1; } } - return cache_updated; + if (good) + return 1; + return 0; } /* Commit pending changes */ int vg_commit(struct volume_group *vg) { - int cache_updated = 0; struct pv_list *pvl; + int ret; - cache_updated = _vg_commit_mdas(vg); + ret = _vg_commit_mdas(vg); set_vg_notify(vg->cmd); - if (cache_updated) { + if (ret) { /* * We need to clear old_name after a successful commit. * The volume_group structure could be reused later. @@ -3126,7 +3131,7 @@ int vg_commit(struct volume_group *vg) } /* If at least one mda commit succeeded, it was committed */ - return cache_updated; + return ret; } /* Don't commit any pending changes */ From 45b164f62cae25da3ac4b9f0d4f87ecaee1575fa Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 12:10:13 -0600 Subject: [PATCH 09/22] create separate lvmcache update functions for read and write The vg read and vg write cases need to update lvmcache differently, so create separate functions for them. The read case now handles checking for outdated mdas and moves them aside into a new list to be repaired in a subsequent commit. --- lib/cache/lvmcache.c | 126 +++++++++++++++++++++++++++++++++++++++- lib/cache/lvmcache.h | 3 +- lib/metadata/metadata.c | 6 +- 3 files changed, 130 insertions(+), 5 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 398717310..08c04590e 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -1685,7 +1685,27 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg return 1; } -int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) +/* + * FIXME: quit trying to mirror changes that a command is making into lvmcache. + * + * First, it's complicated and hard to ensure it's done correctly in every case + * (it would be much easier and safer to just toss out what's in lvmcache and + * reread the info to recreate it from scratch instead of trying to make sure + * every possible discrete state change is correct.) + * + * Second, it's unnecessary if commands just use the vg they are modifying + * rather than also trying to get info from lvmcache. The lvmcache state + * should be populated by label_scan, used to perform vg_read's, and then + * ignored (or dropped so it can't be used). + * + * lvmcache info is already used very little after a command begins its + * operation. The code that's supposed to keep the lvmcache in sync with + * changes being made to disk could be half wrong and we wouldn't know it. + * That creates a landmine for someone who might try to use a bit of it that + * isn't being updated correctly. + */ + +int lvmcache_update_vg_from_write(struct volume_group *vg) { struct pv_list *pvl; struct lvmcache_info *info; @@ -1709,6 +1729,110 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) return 1; } +/* + * The lvmcache representation of a VG after label_scan can be incorrect + * because the label_scan does not use the full VG metadata to construct + * vginfo/info. PVs that don't hold VG metadata weren't attached to the vginfo + * during label scan, and PVs with outdated metadata (claiming to be in the VG, + * but not listed in the latest metadata) were attached to the vginfo, but + * shouldn't be. After vg_read() gets the full metdata in the form of a 'vg', + * this function is called to fix up the lvmcache representation of the VG + * using the 'vg'. + */ + +int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted) +{ + struct pv_list *pvl; + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info, *info2; + struct metadata_area *mda; + char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); + struct lvmcache_vgsummary vgsummary = { + .vgname = vg->name, + .vgstatus = vg->status, + .vgid = vg->id, + .system_id = vg->system_id, + .lock_type = vg->lock_type + }; + + if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, (const char *)&vg->id))) { + log_error(INTERNAL_ERROR "lvmcache_update_vg %s no vginfo", vg->name); + return 0; + } + + /* + * The label scan doesn't know when a PV with old metadata has been + * removed from the VG. Now with the vg we can tell, so remove the + * info for a PV that has been removed from the VG with + * vgreduce --removemissing. + */ + dm_list_iterate_items_safe(info, info2, &vginfo->infos) { + int found = 0; + dm_list_iterate_items(pvl, &vg->pvs) { + if (pvl->pv->dev != info->dev) + continue; + found = 1; + break; + } + + if (found) + continue; + + log_warn("WARNING: outdated PV %s seqno %u has been removed in current VG %s seqno %u.", + dev_name(info->dev), info->summary_seqno, vg->name, vginfo->seqno); + + _drop_vginfo(info, vginfo); /* remove from vginfo->infos */ + dm_list_add(&vginfo->outdated_infos, &info->list); + } + + dm_list_iterate_items(pvl, &vg->pvs) { + (void) dm_strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s)); + + if (!(info = lvmcache_info_from_pvid(pvid_s, pvl->pv->dev, 0))) { + log_debug_cache("lvmcache_update_vg %s no info for %s %s", + vg->name, + (char *) &pvl->pv->id, + pvl->pv->dev ? dev_name(pvl->pv->dev) : "missing"); + continue; + } + + log_debug_cache("lvmcache_update_vg %s for info %s", + vg->name, dev_name(info->dev)); + + /* + * FIXME: use a different function that just attaches info's that + * had no metadata onto the correct vginfo. + * + * info's for PVs without metadata were not connected to the + * vginfo by label_scan, so do it here. + */ + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + log_debug_cache("lvmcache_update_vg %s failed to update info for %s", + vg->name, dev_name(info->dev)); + } + + /* + * Ignored mdas were not copied from info->mdas to + * fid->metadata_areas... when create_text_instance (at the + * start of vg_read) called lvmcache_fid_add_mdas_vg because at + * that point the info's were not connected to the vginfo + * (since label_scan didn't know this without metadata.) + */ + dm_list_iterate_items(mda, &info->mdas) { + if (!mda_is_ignored(mda)) + continue; + log_debug("lvmcache_update_vg %s copy ignored mdas for %s", vg->name, dev_name(info->dev)); + if (!lvmcache_fid_add_mdas_pv(info, vg->fid)) { + log_debug_cache("lvmcache_update_vg %s failed to update mdas for %s", + vg->name, dev_name(info->dev)); + } + break; + } + } + + return 1; +} + /* * We can see multiple different devices with the * same pvid, i.e. duplicates. diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index d4c19f9f4..192170939 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -84,7 +84,8 @@ void lvmcache_del_dev(struct device *dev); /* Update things */ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary); -int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted); +int lvmcache_update_vg_from_read(struct volume_group *vg, unsigned precommitted); +int lvmcache_update_vg_from_write(struct volume_group *vg); void lvmcache_lock_vgname(const char *vgname, int read_only); void lvmcache_unlock_vgname(const char *vgname); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 49c1e748c..bd6ec4d99 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3098,7 +3098,7 @@ static int _vg_commit_mdas(struct volume_group *vg) /* Update cache first time we succeed */ if (!failed && !cache_updated) { - lvmcache_update_vg(vg, 0); + lvmcache_update_vg_from_write(vg); cache_updated = 1; } } @@ -3993,7 +3993,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, * If there is no precommitted metadata, committed metadata * is read and stored in the cache even if use_precommitted is set */ - lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED); + lvmcache_update_vg_from_read(correct_vg, correct_vg->status & PRECOMMITTED); if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { release_vg(correct_vg); @@ -4149,7 +4149,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, * If there is no precommitted metadata, committed metadata * is read and stored in the cache even if use_precommitted is set */ - lvmcache_update_vg(correct_vg, (correct_vg->status & PRECOMMITTED)); + lvmcache_update_vg_from_read(correct_vg, (correct_vg->status & PRECOMMITTED)); if (inconsistent) { /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */ From ab61a6d85da4e4f1d540e40ebac77999bf3c9a57 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 12:32:26 -0600 Subject: [PATCH 10/22] move wipe_outdated_pvs to vg_write and implement it based on a device, not based on a pv struct (which is not available when the device is not a part of the vg.) currently only the vgremove command wipes outdated pvs until more advanced recovery is added in a subsequent commit --- lib/commands/toolcontext.h | 1 + lib/format_text/format-text.c | 33 +++++++++ lib/format_text/format-text.h | 3 + lib/metadata/metadata.c | 123 +++++++++++++++++++--------------- tools/vgremove.c | 2 + 5 files changed, 109 insertions(+), 53 deletions(-) diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index 7d373abb3..6e4530c8a 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -179,6 +179,7 @@ struct cmd_context { unsigned use_hints:1; /* if hints are enabled this cmd can use them */ unsigned pvscan_recreate_hints:1; /* enable special case hint handling for pvscan --cache */ unsigned scan_lvs:1; + unsigned wipe_outdated_pvs:1; /* * Devices and filtering. diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 9d58c53b3..6e224f006 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -2463,3 +2463,36 @@ bad: return NULL; } + +int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev, + struct metadata_area *mda) +{ + struct mda_context *mdac = mda->metadata_locn; + uint64_t start_byte = mdac->area.start; + struct mda_header *mdab; + struct raw_locn *rlocn_slot0; + struct raw_locn *rlocn_slot1; + uint32_t bad_fields = 0; + + if (!(mdab = raw_read_mda_header(cmd->fmt, &mdac->area, mda_is_primary(mda), 0, &bad_fields))) { + log_error("Failed to read outdated pv mda header on %s", dev_name(dev)); + return 0; + } + + rlocn_slot0 = &mdab->raw_locns[0]; + rlocn_slot1 = &mdab->raw_locns[1]; + + rlocn_slot0->offset = 0; + rlocn_slot0->size = 0; + rlocn_slot0->checksum = 0; + rlocn_slot1->offset = 0; + rlocn_slot1->size = 0; + rlocn_slot1->checksum = 0; + + if (!_raw_write_mda_header(cmd->fmt, dev, mda_is_primary(mda), start_byte, mdab)) { + log_error("Failed to write outdated pv mda header on %s", dev_name(dev)); + return 0; + } + + return 1; +} diff --git a/lib/format_text/format-text.h b/lib/format_text/format-text.h index c42d5c0f7..2345d52a9 100644 --- a/lib/format_text/format-text.h +++ b/lib/format_text/format-text.h @@ -77,4 +77,7 @@ struct data_area_list { struct disk_locn disk_locn; }; +int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev, + struct metadata_area *mda); + #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index bd6ec4d99..a13be57b0 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -28,11 +28,14 @@ #include "lib/display/display.h" #include "lib/locking/locking.h" #include "lib/format_text/archiver.h" +#include "lib/format_text/format-text.h" +#include "lib/format_text/layout.h" +#include "lib/format_text/import-export.h" #include "lib/config/defaults.h" #include "lib/locking/lvmlockd.h" -#include "time.h" #include "lib/notify/lvmnotify.h" +#include #include static struct physical_volume *_pv_read(struct cmd_context *cmd, @@ -2922,6 +2925,69 @@ static int _handle_historical_lvs(struct volume_group *vg) return 1; } +static void _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg) +{ + struct dm_list devs; + struct dm_list *mdas = NULL; + struct device_list *devl; + struct device *dev; + struct metadata_area *mda; + struct label *label; + struct lvmcache_info *info; + uint32_t ext_flags; + + dm_list_init(&devs); + + /* + * When vg_read selected a good copy of the metadata, it used it to + * update the lvmcache representation of the VG (lvmcache_update_vg). + * At that point outdated PVs were recognized and moved into the + * vginfo->outdated_infos list. Here we clear the PVs on that list. + */ + + lvmcache_get_outdated_devs(cmd, vg->name, (const char *)&vg->id, &devs); + + dm_list_iterate_items(devl, &devs) { + dev = devl->dev; + + lvmcache_get_outdated_mdas(cmd, vg->name, (const char *)&vg->id, dev, &mdas); + + if (mdas) { + dm_list_iterate_items(mda, mdas) { + log_warn("WARNING: wiping mda on outdated PV %s", dev_name(dev)); + + if (!text_wipe_outdated_pv_mda(cmd, dev, mda)) + log_warn("WARNING: failed to wipe mda on outdated PV %s", dev_name(dev)); + } + } + + if (!(label = lvmcache_get_dev_label(dev))) { + log_error("_wipe_outdated_pvs no label for %s", dev_name(dev)); + continue; + } + + info = label->info; + ext_flags = lvmcache_ext_flags(info); + ext_flags &= ~PV_EXT_USED; + lvmcache_set_ext_version(info, PV_HEADER_EXTENSION_VSN); + lvmcache_set_ext_flags(info, ext_flags); + + log_warn("WARNING: wiping header on outdated PV %s", dev_name(dev)); + + if (!label_write(dev, label)) + log_warn("WARNING: failed to wipe header on outdated PV %s", dev_name(dev)); + + lvmcache_del(info); + } + + /* + * A vgremove will involve many vg_write() calls (one for each lv + * removed) but we only need to wipe pvs once, so clear the outdated + * list so it won't be wiped again. + */ + lvmcache_del_outdated_devs(cmd, vg->name, (const char *)&vg->id); +} + /* * After vg_write() returns success, * caller MUST call either vg_commit() or vg_revert() @@ -2986,6 +3052,9 @@ int vg_write(struct volume_group *vg) return 0; } + if (vg->cmd->wipe_outdated_pvs) + _wipe_outdated_pvs(vg->cmd, vg); + if (critical_section()) log_error(INTERNAL_ERROR "Writing metadata in critical section."); @@ -3538,52 +3607,6 @@ static int _repair_inconsistent_vg(struct volume_group *vg, uint32_t lockd_state return 1; } -static int _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg, struct dm_list *to_check, uint32_t lockd_state) -{ - struct pv_list *pvl, *pvl2; - char uuid[64] __attribute__((aligned(8))); - - if (lvmcache_found_duplicate_pvs()) { - log_debug_metadata("Skip wiping outdated PVs with duplicates."); - return 0; - } - - /* - * Cannot write foreign VGs, the owner will repair it. - * Also, if another host is updating its VG, we may read - * the PVs while some are written but not others, making - * some PVs look outdated to us just because we're reading - * the VG while it's only partially written out. - */ - if (_is_foreign_vg(vg)) { - log_debug_metadata("Skip wiping outdated PVs for foreign VG."); - return 0; - } - - if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_verbose("Skip wiping outdated PVs for shared VG without exclusive lock."); - return 0; - } - - dm_list_iterate_items(pvl, to_check) { - dm_list_iterate_items(pvl2, &vg->pvs) { - if (pvl->pv->dev == pvl2->pv->dev) - goto next_pv; - } - - - if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) - return_0; - log_warn("WARNING: Removing PV %s (%s) that no longer belongs to VG %s", - pv_dev_name(pvl->pv), uuid, vg->name); - if (!pv_write_orphan(cmd, pvl->pv)) - return_0; -next_pv: - ; - } - return 1; -} - static int _check_or_repair_pv_ext(struct cmd_context *cmd, struct volume_group *vg, uint32_t lockd_state, @@ -4217,12 +4240,6 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, release_vg(correct_vg); return NULL; } - - if (!_wipe_outdated_pvs(cmd, correct_vg, &all_pvs, lockd_state)) { - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return_NULL; - } } _free_pv_list(&all_pvs); diff --git a/tools/vgremove.c b/tools/vgremove.c index 1aae87d1a..23640f670 100644 --- a/tools/vgremove.c +++ b/tools/vgremove.c @@ -99,6 +99,8 @@ int vgremove(struct cmd_context *cmd, int argc, char **argv) clear_hint_file(cmd); + cmd->wipe_outdated_pvs = 1; + cmd->handles_missing_pvs = 1; ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_FOR_UPDATE, 0, From 89914a541f081e1c234a455c17d82269cd90364b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 13:00:33 -0600 Subject: [PATCH 11/22] process_each_pv handle outdated pvs process_each_pv should account for outdated pvs in the list of all devices it is processing. --- tools/toollib.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/toollib.c b/tools/toollib.c index 8e882e926..78b174553 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -4156,12 +4156,16 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, struct physical_volume *pv; struct pv_list *pvl; struct device_id_list *dil; + struct device_list *devl; + struct dm_list outdated_devs; const char *pv_name; int process_pv; int do_report_ret_code = 1; int ret_max = ECMD_PROCESSED; int ret = 0; + dm_list_init(&outdated_devs); + log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_PV); vg_uuid[0] = '\0'; @@ -4247,6 +4251,12 @@ static int _process_pvs_in_vg(struct cmd_context *cmd, break; log_set_report_object_name_and_id(NULL, NULL); } + + if (!is_orphan_vg(vg->name)) + lvmcache_get_outdated_devs(cmd, vg->name, (const char *)&vg->id, &outdated_devs); + dm_list_iterate_items(devl, &outdated_devs) + _device_list_remove(all_devices, devl->dev); + do_report_ret_code = 0; out: if (do_report_ret_code) From de3d3b11f493a89ebee2eab7efae70d33face954 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 13:18:45 -0600 Subject: [PATCH 12/22] move pv header repairs to vg_write Correct PV header in-use or version fields from vg_write instead of vg_read. --- lib/format_text/format-text.c | 14 ++- lib/metadata/metadata.c | 168 +++++----------------------------- 2 files changed, 34 insertions(+), 148 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 6e224f006..4f0a004df 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1600,12 +1600,16 @@ static int _text_pv_needs_rewrite(const struct format_type *fmt, struct physical { struct lvmcache_info *info; uint32_t ext_vsn; + uint32_t ext_flags; *needs_rewrite = 0; if (!pv->is_labelled) return 1; + if (!pv->dev) + return 1; + if (!(info = lvmcache_info_from_pvid((const char *)&pv->id, pv->dev, 0))) { log_error("Failed to find cached info for PV %s.", pv_dev_name(pv)); return 0; @@ -1613,8 +1617,16 @@ static int _text_pv_needs_rewrite(const struct format_type *fmt, struct physical ext_vsn = lvmcache_ext_version(info); - if (ext_vsn < PV_HEADER_EXTENSION_VSN) + if (ext_vsn < PV_HEADER_EXTENSION_VSN) { + log_debug("PV %s header needs rewrite for new ext version", dev_name(pv->dev)); *needs_rewrite = 1; + } + + ext_flags = lvmcache_ext_flags(info); + if (!(ext_flags & PV_EXT_USED)) { + log_debug("PV %s header needs rewrite to set ext used", dev_name(pv->dev)); + *needs_rewrite = 1; + } return 1; } diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index a13be57b0..804d0cf4a 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -2810,57 +2810,6 @@ static int _pv_in_pv_list(struct physical_volume *pv, struct dm_list *head) return 0; } -/* - * Check if any of the PVs in VG still contain old PV headers - * and if yes, schedule them for PV header update. - */ -static int _vg_update_old_pv_ext_if_needed(struct volume_group *vg) -{ - struct pv_list *pvl, *new_pvl; - int pv_needs_rewrite; - - if (!(vg->fid->fmt->features & FMT_PV_FLAGS)) - return 1; - - dm_list_iterate_items(pvl, &vg->pvs) { - if (is_missing_pv(pvl->pv) || - !pvl->pv->fmt->ops->pv_needs_rewrite) - continue; - - if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) - continue; - - if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv, - &pv_needs_rewrite)) - return_0; - - if (pv_needs_rewrite) { - /* - * Schedule PV for writing only once! - */ - if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) - continue; - - if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl)))) { - log_error("pv_to_write allocation for '%s' failed", pv_dev_name(pvl->pv)); - return 0; - } - new_pvl->pv = pvl->pv; - dm_list_add(&vg->pv_write_list, &new_pvl->list); - log_debug("PV %s has old extension header, updating to newest version.", - pv_dev_name(pvl->pv)); - } - } - - if (!dm_list_empty(&vg->pv_write_list) && - (!vg_write(vg) || !vg_commit(vg))) { - log_error("Failed to update old PV extension headers in VG %s.", vg->name); - return 0; - } - - return 1; -} - static int _check_historical_lv_is_valid(struct historical_logical_volume *hlv) { struct glv_list *glvl; @@ -2995,7 +2944,7 @@ static void _wipe_outdated_pvs(struct cmd_context *cmd, struct volume_group *vg) int vg_write(struct volume_group *vg) { struct dm_list *mdah; - struct pv_list *pvl, *pvl_safe; + struct pv_list *pvl, *pvl_safe, *new_pvl; struct metadata_area *mda; struct lv_list *lvl; int revert = 0, wrote = 0; @@ -3063,6 +3012,26 @@ int vg_write(struct volume_group *vg) memlock_unlock(vg->cmd); vg->seqno++; + dm_list_iterate_items(pvl, &vg->pvs) { + int update_pv_header = 0; + + if (_pv_in_pv_list(pvl->pv, &vg->pv_write_list)) + continue; + + if (!pvl->pv->fmt->ops->pv_needs_rewrite(pvl->pv->fmt, pvl->pv, &update_pv_header)) + continue; + + if (!update_pv_header) + continue; + + if (!(new_pvl = dm_pool_zalloc(vg->vgmem, sizeof(*new_pvl)))) + continue; + + new_pvl->pv = pvl->pv; + dm_list_add(&vg->pv_write_list, &new_pvl->list); + log_warn("WARNING: updating PV header on %s for VG %s.", pv_dev_name(pvl->pv), vg->name); + } + dm_list_iterate_items_safe(pvl, pvl_safe, &vg->pv_write_list) { if (!pv_write(vg->cmd, pvl->pv, 1)) return_0; @@ -3607,88 +3576,6 @@ static int _repair_inconsistent_vg(struct volume_group *vg, uint32_t lockd_state return 1; } -static int _check_or_repair_pv_ext(struct cmd_context *cmd, - struct volume_group *vg, - uint32_t lockd_state, - int repair, int *inconsistent_pvs) -{ - char uuid[64] __attribute__((aligned(8))); - struct lvmcache_info *info; - uint32_t ext_version, ext_flags; - struct pv_list *pvl; - unsigned pvs_fixed = 0; - int r = 0; - - *inconsistent_pvs = 0; - - dm_list_iterate_items(pvl, &vg->pvs) { - /* Missing PV - nothing to do. */ - if (is_missing_pv(pvl->pv)) - continue; - - if (!pvl->pv->dev) { - /* is_missing_pv doesn't catch NULL dev */ - memset(&uuid, 0, sizeof(uuid)); - if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) - goto_out; - log_warn("WARNING: Not repairing PV %s with missing device.", uuid); - continue; - } - - if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0))) { - log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv)); - goto out; - } - - ext_version = lvmcache_ext_version(info); - if (ext_version < 2) - continue; - - ext_flags = lvmcache_ext_flags(info); - if (!(ext_flags & PV_EXT_USED)) { - if (!repair) { - *inconsistent_pvs = 1; - /* we're not repairing now, so no need to - * check further PVs - inconsistent_pvs is already - * set and that will trigger the repair next time */ - return 1; - } - - if (_is_foreign_vg(vg)) { - log_verbose("Skip repair of PV %s that is in foreign " - "VG %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - *inconsistent_pvs = 1; - } else if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_warn("Skip repair of PV %s that is in shared " - "VG %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - *inconsistent_pvs = 1; - } else { - log_warn("WARNING: Repairing Physical Volume %s that is " - "in Volume Group %s but not marked as used.", - pv_dev_name(pvl->pv), vg->name); - - /* pv write will set correct ext_flags */ - if (!pv_write(cmd, pvl->pv, 1)) { - *inconsistent_pvs = 1; - log_error("Failed to repair physical volume \"%s\".", - pv_dev_name(pvl->pv)); - goto out; - } - pvs_fixed++; - } - } - } - - r = 1; -out: - if ((pvs_fixed > 0) && !_repair_inconsistent_vg(vg, lockd_state)) - return_0; - - return r; -} - /* Caller sets consistent to 1 if it's safe for vg_read_internal to correct * inconsistent metadata on disk (i.e. the VG write lock is held). * This guarantees only consistent metadata is returned. @@ -3724,7 +3611,6 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, int inconsistent_mdas = 0; int inconsistent_mda_count = 0; int strip_historical_lvs = enable_repair; - int update_old_pv_ext = enable_repair; unsigned use_precommitted = precommitted; struct dm_list *pvids; struct pv_list *pvl; @@ -4258,19 +4144,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, return NULL; } - /* We have the VG now finally, check if PV ext info is in sync with VG metadata. */ - if (!_check_or_repair_pv_ext(cmd, correct_vg, lockd_state, skipped_rescan ? 0 : enable_repair, - &inconsistent_pvs)) { - release_vg(correct_vg); - return_NULL; - } - if (correct_vg && enable_repair && !skipped_rescan) { - if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } - if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) { release_vg(correct_vg); return_NULL; From 47effdc025384cef5b3235a9c4b90e7fd74d68a4 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 13:39:41 -0600 Subject: [PATCH 13/22] vgck --updatemetadata is a new command uses vg_write to correct more common or less severe issues, and also adds the ability to repair some metadata corruption that couldn't be handled previously. --- lib/format_text/text_label.c | 3 ++ lib/metadata/metadata-exported.h | 2 + lib/metadata/metadata.c | 83 ++++++++++++++++++++++++++++++++ lib/metadata/metadata.h | 1 + tools/args.h | 3 ++ tools/command-lines.in | 5 ++ tools/commands.h | 2 +- tools/vgck.c | 54 +++++++++++++++++++++ 8 files changed, 152 insertions(+), 1 deletion(-) diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 42184dec7..170293409 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -341,6 +341,9 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) goto fail; } + if (mda) + mda->header_start = mdah->start; + mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns)); if (mda_is_ignored(mda)) { diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 219c784e9..3af89061d 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -1381,4 +1381,6 @@ int lv_on_pmem(struct logical_volume *lv); int vg_is_foreign(struct volume_group *vg); +void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg); + #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 804d0cf4a..5eabfa68f 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -5597,3 +5597,86 @@ int vg_is_foreign(struct volume_group *vg) return _is_foreign_vg(vg); } +void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg) +{ + struct dm_list bad_mda_list; + struct mda_list *mdal; + struct metadata_area *mda; + struct device *dev; + + dm_list_init(&bad_mda_list); + + lvmcache_get_bad_mdas(cmd, vg->name, (const char *)&vg->id, &bad_mda_list); + + dm_list_iterate_items(mdal, &bad_mda_list) { + mda = mdal->mda; + dev = mda_get_device(mda); + + /* + * bad_fields: + * + * 0: shouldn't happen + * + * READ|INTERNAL: there's probably nothing wrong on disk + * + * MAGIC|START: there's a good chance that we were + * reading the mda_header from the wrong location; maybe + * the pv_header location was wrong. We don't want to + * write new metadata to the wrong location. To handle + * this we would want to do some further verification that + * we have the mda location correct. + * + * VERSION|CHECKSUM: when the others are correct these + * look safe to repair. + * + * HEADER: general error related to header, covered by fields + * above. + * + * TEXT: general error related to text metadata, we can repair. + */ + if (!mda->bad_fields || + (mda->bad_fields & BAD_MDA_READ) || + (mda->bad_fields & BAD_MDA_INTERNAL) || + (mda->bad_fields & BAD_MDA_MAGIC) || + (mda->bad_fields & BAD_MDA_START)) { + log_warn("WARNING: not repairing bad metadata (0x%x) for mda%d on %s", + mda->bad_fields, mda->mda_num, dev_name(dev)); + continue; + } + + /* + * vg_write/vg_commit reread the mda_header which checks the + * mda header fields and fails if any are bad, which stops + * vg_write/vg_commit from continuing. Suppress these header + * field checks when we know the field is bad and we are going + * to replace it. FIXME: do vg_write/vg_commit really need to + * reread and recheck the mda_header again (probably not)? + */ + + if (mda->bad_fields & BAD_MDA_CHECKSUM) + mda->ignore_bad_fields |= BAD_MDA_CHECKSUM; + if (mda->bad_fields & BAD_MDA_VERSION) + mda->ignore_bad_fields |= BAD_MDA_VERSION; + + log_warn("WARNING: repairing bad metadata (0x%x) in mda%d at %llu on %s.", + mda->bad_fields, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + + if (!mda->ops->vg_write(vg->fid, vg, mda)) { + log_warn("WARNING: failed to write VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + + if (!mda->ops->vg_precommit(vg->fid, vg, mda)) { + log_warn("WARNING: failed to precommit VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + + if (!mda->ops->vg_commit(vg->fid, vg, mda)) { + log_warn("WARNING: failed to commit VG %s metadata to bad mda%d at %llu on %s.", + vg->name, mda->mda_num, (unsigned long long)mda->header_start, dev_name(dev)); + continue; + } + } +} diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index 6d158fe8a..63ee4a619 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -185,6 +185,7 @@ struct metadata_area { struct metadata_area_ops *ops; void *metadata_locn; uint32_t status; + uint64_t header_start; /* mda_header.start */ int mda_num; uint32_t bad_fields; /* BAD_MDA_ flags are set to indicate errors found when reading */ uint32_t ignore_bad_fields; /* BAD_MDA_ flags are set to indicate errors to ignore */ diff --git a/tools/args.h b/tools/args.h index 69f7e0790..e972c7dd9 100644 --- a/tools/args.h +++ b/tools/args.h @@ -1393,6 +1393,9 @@ arg(thin_ARG, 'T', "thin", 0, 0, 0, "See --type thin, --type thin-pool, and --virtualsize.\n" "See \\fBlvmthin\\fP(7) for more information about LVM thin provisioning.\n") +arg(updatemetadata_ARG, '\0', "updatemetadata", 0, 0, 0, + "Update VG metadata to correct problems.\n") + arg(uuid_ARG, 'u', "uuid", 0, 0, 0, "#pvchange\n" "Generate new random UUID for specified PVs.\n" diff --git a/tools/command-lines.in b/tools/command-lines.in index 03dbf571d..4601239f2 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -1624,6 +1624,11 @@ vgck OO: --reportformat ReportFmt OP: VG|Tag ... ID: vgck_general +DESC: Read and display information about a VG. + +vgck --updatemetadata VG +ID: vgck_update_metadata +DESC: Rewrite VG metadata to correct problems. --- diff --git a/tools/commands.h b/tools/commands.h index 612182b45..4006fab21 100644 --- a/tools/commands.h +++ b/tools/commands.h @@ -177,7 +177,7 @@ xx(vgchange, xx(vgck, "Check the consistency of volume group(s)", - ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | ALLOW_HINTS) + ALL_VGS_IS_DEFAULT | LOCKD_VG_SH) xx(vgconvert, "Change volume group metadata format", diff --git a/tools/vgck.c b/tools/vgck.c index a126c2924..90fc5a3aa 100644 --- a/tools/vgck.c +++ b/tools/vgck.c @@ -15,6 +15,57 @@ #include "tools.h" +/* + * TODO: we cannot yet repair corruption in label_header, pv_header/locations, + * or corruption of some mda_header fields. + */ + +static int _update_metadata_single(struct cmd_context *cmd __attribute__((unused)), + const char *vg_name, + struct volume_group *vg, + struct processing_handle *handle __attribute__((unused))) +{ + + /* + * Simply calling vg_write can correct or clean up various things: + * . some mda's have old versions of metdadata + * . wipe outdated PVs + * . fix pv_header used flag and version + * . strip historical lvs + * . clear missing pv flag on unused PV + */ + if (!vg_write(vg)) { + log_error("Failed to write VG."); + return 0; + } + + if (!vg_commit(vg)) { + log_error("Failed to commit VG."); + return 0; + } + + /* + * vg_write does not write to "bad" mdas (where "bad" is corrupt, can't + * be processed when reading). bad mdas are not kept in + * fid->metadata_areas_in_use so vg_read and vg_write ignore them, but + * they are saved in lvmcache. this gets them from lvmcache and tries + * to write this metadata to them. + */ + vg_write_commit_bad_mdas(cmd, vg); + + return 1; +} + +static int _update_metadata(struct cmd_context *cmd, int argc, char **argv) +{ + cmd->handles_missing_pvs = 1; + cmd->wipe_outdated_pvs = 1; + cmd->handles_unknown_segments = 1; + + return process_each_vg(cmd, argc, argv, NULL, NULL, READ_FOR_UPDATE, 0, NULL, + &_update_metadata_single); +} + static int vgck_single(struct cmd_context *cmd __attribute__((unused)), const char *vg_name, struct volume_group *vg, @@ -37,6 +88,9 @@ static int vgck_single(struct cmd_context *cmd __attribute__((unused)), int vgck(struct cmd_context *cmd, int argc, char **argv) { + if (arg_is_set(cmd, updatemetadata_ARG)) + return _update_metadata(cmd, argc, argv); + return process_each_vg(cmd, argc, argv, NULL, NULL, 0, 0, NULL, &vgck_single); } From 5dd32680b0cbe468c07b3187bbb8922323b848f8 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 13:46:35 -0600 Subject: [PATCH 14/22] vgcfgbackup add error messages --- tools/vgcfgbackup.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/vgcfgbackup.c b/tools/vgcfgbackup.c index 7d061d517..5a0fb13fa 100644 --- a/tools/vgcfgbackup.c +++ b/tools/vgcfgbackup.c @@ -73,6 +73,15 @@ static int _vg_backup_single(struct cmd_context *cmd, const char *vg_name, return ECMD_FAILED; } + if (vg_missing_pv_count(vg)) { + log_error("No backup taken: specify filename with -f to backup with missing PVs."); + return ECMD_FAILED; + } + if (vg_has_unknown_segments(vg)) { + log_error("No backup taken: specify filename with -f to backup with unknown segments."); + return ECMD_FAILED; + } + /* just use the normal backup code */ backup_enable(cmd, 1); /* force a backup */ if (!backup(vg)) @@ -97,6 +106,14 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv) handle->custom_handle = &last_filename; + /* + * Just set so that we can do the check ourselves above and + * report a helpful error message in place of the error message + * that would be generated from vg_read. + */ + cmd->handles_missing_pvs = 1; + cmd->handles_unknown_segments = 1; + init_pvmove(1); ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_ALLOW_INCONSISTENT, 0, From 015b906069a989552c09f61d7230b000d0a1f0f0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 6 Feb 2019 13:51:54 -0600 Subject: [PATCH 15/22] add a warning message when updating old metadata in an mda that had previously not been updated --- lib/metadata/metadata.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 5eabfa68f..2d3ff1c28 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -2947,6 +2947,7 @@ int vg_write(struct volume_group *vg) struct pv_list *pvl, *pvl_safe, *new_pvl; struct metadata_area *mda; struct lv_list *lvl; + struct device *mda_dev; int revert = 0, wrote = 0; if (vg_is_shared(vg)) { @@ -3040,14 +3041,34 @@ int vg_write(struct volume_group *vg) /* Write to each copy of the metadata area */ dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + if (mda->status & MDA_FAILED) continue; + + /* + * When the scan and vg_read find old metadata in an mda, they + * leave the info struct in lvmcache, and leave the mda in + * info->mdas. That means we use the mda here to write new + * metadata into. This means that a command writing a VG will + * automatically update old metadata to the latest. + * + * This can also happen if the metadata was ignored on this + * dev, and then it's later changed to not ignored, and + * we see the old metadata. + */ + if (lvmcache_has_old_metadata(vg->cmd, vg->name, (const char *)&vg->id, mda_dev)) { + log_warn("WARNING: updating old metadata to %u on %s for VG %s.", + vg->seqno, dev_name(mda_dev), vg->name); + } + if (!mda->ops->vg_write) { log_error("Format does not support writing volume" "group metadata areas"); revert = 1; break; } + if (!mda->ops->vg_write(vg->fid, vg, mda)) { if (vg->cmd->handles_missing_pvs) { log_warn("WARNING: Failed to write an MDA of VG %s.", vg->name); From ba7ff96faff052c6145c71222ea5047a6bcee33b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 24 May 2019 12:04:37 -0500 Subject: [PATCH 16/22] improve reading and repairing vg metadata The fact that vg repair is implemented as a part of vg read has led to a messy and complicated implementation of vg_read, and limited and uncontrolled repair capability. This splits read and repair apart. Summary ------- - take all kinds of various repairs out of vg_read - vg_read no longer writes anything - vg_read now simply reads and returns vg metadata - vg_read ignores bad or old copies of metadata - vg_read proceeds with a single good copy of metadata - improve error checks and handling when reading - keep track of bad (corrupt) copies of metadata in lvmcache - keep track of old (seqno) copies of metadata in lvmcache - keep track of outdated PVs in lvmcache - vg_write will do basic repairs - new command vgck --updatemetdata will do all repairs Details ------- - In scan, do not delete dev from lvmcache if reading/processing fails; the dev is still present, and removing it makes it look like the dev is not there. Records are now kept about the problems with each PV so they be fixed/repaired in the appropriate places. - In scan, record a bad mda on failure, and delete the mda from mda in use list so it will not be used by vg_read or vg_write, only by repair. - In scan, succeed if any good mda on a device is found, instead of failing if any is bad. The bad/old copies of metadata should not interfere with normal usage while good copies can be used. - In scan, add a record of old mdas in lvmcache for later, do not repair them while reading, and do not let them prevent us from finding and using a good copy of metadata from elsewhere. One result is that "inconsistent metadata" is no longer a read error, but instead a record in lvmcache that can be addressed separate from the read. - Treat a dev with no good mdas like a dev with no mdas, which is an existing case we already handle. - Don't use a fake vg "handle" for returning an error from vg_read, or the vg_read_error function for getting that error number; just return null if the vg cannot be read or used, and an error_flags arg with flags set for the specific kind of error (which can be used later for determining the kind of repair.) - Saving an original copy of the vg metadata, for purposes of reverting a write, is now done explicitly in vg_read instead of being hidden in the vg_make_handle function. - When a vg is not accessible due to "access restrictions" but is otherwise fine, return the vg through the new error_vg arg so that process_each_pv can skip the PVs in the VG while processing. (This is a temporary accomodation for the way process_each_pv tracks which devs have been looked at, and can be dropped later when process_each_pv implementation dev tracking is changed.) - vg_read does not try to fix or recover a vg, but now just reads the metadata, checks access restrictions and returns it. (Checking access restrictions might be better done outside of vg_read, but this is a later improvement.) - _vg_read now simply makes one attempt to read metadata from each mda, and uses the most recent copy to return to the caller in the form of a 'vg' struct. (bad mdas were excluded during the scan and are not retried) (old mdas were not excluded during scan and are retried here) - vg_read uses _vg_read to get the latest copy of metadata from mdas, and then makes various checks against it to produce warnings, and to check if VG access is allowed (access restrictions include: writable, foreign, shared, clustered, missing pvs). - Things that were previously silently/automatically written by vg_read that are now done by vg_write, based on the records made in lvmcache during the scan and read: . clearing the missing flag . updating old copies of metadata . clearing outdated pvs . updating pv header flags - Bad/corrupt metadata are now repaired; they were not before. Test changes ------------ - A read command no longer writes the VG to repair it, so add a write command to do a repair. (inconsistent-metadata, unlost-pv) - When a missing PV is removed from a VG, and then the device is enabled again, vgck --updatemetadata is needed to clear the outdated PV before it can be used again, where it wasn't before. (lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair, mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv) Reading bad/old metadata ------------------------ - "bad metadata": the mda_header or metadata text has invalid fields or can't be parsed by lvm. This is a form of corruption that would not be caused by known failure scenarios. A checksum error is typically included among the errors reported. - "old metadata": a valid copy of the metadata that has a smaller seqno than other copies of the metadata. This can happen if the device failed, or io failed, or lvm failed while commiting new metadata to all the metadata areas. Old metadata on a PV that has been removed from the VG is the "outdated" case below. When a VG has some PVs with bad/old metadata, lvm can simply ignore the bad/old copies, and use a good copy. This is why there are multiple copies of the metadata -- so it's available even when some of the copies cannot be used. The bad/old copies do not have to be repaired before the VG can be used (the repair can happen later.) A PV with no good copies of the metadata simply falls back to being treated like a PV with no mdas; a common and harmless configuration. When bad/old metadata exists, lvm warns the user about it, and suggests repairing it using a new metadata repair command. Bad metadata in particular is something that users will want to investigate and repair themselves, since it should not happen and may indicate some other problem that needs to be fixed. PVs with bad/old metadata are not the same as missing devices. Missing devices will block various kinds of VG modification or activation, but bad/old metadata will not. Previously, lvm would attempt to repair bad/old metadata whenever it was read. This was unnecessary since lvm does not require every copy of the metadata to be used. It would also hide potential problems that should be investigated by the user. It was also dangerous in cases where the VG was on shared storage. The user is now allowed to investigate potential problems and decide how and when to repair them. Repairing bad/old metadata -------------------------- When label scan sees bad metadata in an mda, that mda is removed from the lvmcache info->mdas list. This means that vg_read will skip it, and not attempt to read/process it again. If it was the only in-use mda on a PV, that PV is treated like a PV with no mdas. It also means that vg_write will also skip the bad mda, and not attempt to write new metadata to it. The only way to repair bad metadata is with the metadata repair command. When label scan sees old metadata in an mda, that mda is kept in the lvmcache info->mdas list. This means that vg_read will read/process it again, and likely see the same mismatch with the other copies of the metadata. Like the label_scan, the vg_read will simply ignore the old copy of the metadata and use the latest copy. If the command is modifying the vg (e.g. lvcreate), then vg_write, which writes new metadata to every mda on info->mdas, will write the new metadata to the mda that had the old version. If successful, this will resolve the old metadata problem (without needing to run a metadata repair command.) Outdated PVs ------------ An outdated PV is a PV that has an old copy of VG metadata that shows it is a member of the VG, but the latest copy of the VG metadata does not include this PV. This happens if the PV is disconnected, vgreduce --removemissing is run to remove the PV from the VG, then the PV is reconnected. In this case, the outdated PV needs have its outdated metadata removed and the PV used flag needs to be cleared. This repair will be done by the subsequent repair command. It is also done if vgremove is run on the VG. MISSING PVs ----------- When a device is missing, most commands will refuse to modify the VG. This is the simple case. More complicated is when a command is allowed to modify the VG while it is missing a device. When a VG is written while a device is missing for one of it's PVs, the VG metadata is written to disk with the MISSING flag on the PV with the missing device. When the VG is next used, it is treated as if the PV with the MISSING flag still has a missing device, even if that device has reappeared. If all LVs that were using a PV with the MISSING flag are removed or repaired so that the MISSING PV is no longer used, then the next time the VG metadata is written, the MISSING flag will be dropped. Alternative methods of clearing the MISSING flag are: vgreduce --removemissing will remove PVs with missing devices, or PVs with the MISSING flag where the device has reappeared. vgextend --restoremissing will clear the MISSING flag on PVs where the device has reappeared, allowing the VG to be used normally. This must be done with caution since the reappeared device may have old data that is inconsistent with data on other PVs. Bad mda repair -------------- The new command: vgck --updatemetadata VG first uses vg_write to repair old metadata, and other basic issues mentioned above (old metadata, outdated PVs, pv_header flags, MISSING_PV flags). It will also go further and repair bad metadata: . text metadata that has a bad checksum . text metadata that is not parsable . corrupt mda_header checksum and version fields (To keep a clean diff, #if 0 is added around functions that are replaced by new code. These commented functions are removed by the following commit.) --- lib/cache/lvmcache.c | 8 + lib/format_text/format-text.c | 7 +- lib/format_text/import.c | 6 +- lib/format_text/text_label.c | 213 +++++-- lib/label/label.c | 4 - lib/metadata/metadata-exported.h | 30 +- lib/metadata/metadata.c | 628 +++++++++++++++++++- lib/metadata/pv.h | 1 + lib/metadata/vg.c | 6 +- lib/metadata/vg.h | 5 - test/shell/inconsistent-metadata.sh | 55 +- test/shell/lvconvert-repair-cache.sh | 3 + test/shell/lvconvert-repair-policy.sh | 2 + test/shell/lvconvert-repair-raid.sh | 7 + test/shell/lvconvert-repair.sh | 6 + test/shell/lvmcache-exercise.sh | 10 +- test/shell/mirror-vgreduce-removemissing.sh | 8 + test/shell/pv-ext-flags.sh | 34 +- test/shell/unlost-pv.sh | 58 +- test/shell/vgck.sh | 4 +- tools/polldaemon.c | 17 +- tools/toollib.c | 73 ++- tools/vgcfgbackup.c | 8 +- tools/vgextend.c | 19 +- tools/vgsplit.c | 7 +- 25 files changed, 974 insertions(+), 245 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 08c04590e..9a3a2e389 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -2122,6 +2122,14 @@ int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance return lvmcache_fid_add_mdas(info, fid, info->dev->pvid, ID_LEN); } +/* + * This is the linkage where information is passed from + * the label_scan to vg_read. + * + * Called by create_text_instance in vg_read to copy the + * mda's found during label_scan and saved in info->mdas, + * to fid->metadata_areas_in_use which is used by vg_read. + */ int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_instance *fid) { struct lvmcache_info *info; diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 4f0a004df..13b2c664f 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1357,7 +1357,7 @@ int read_metadata_location_summary(const struct format_type *fmt, * valid vg name. */ if (!validate_name(namebuf)) { - log_error("Metadata location on %s at %llu begins with invalid VG name.", + log_warn("WARNING: Metadata location on %s at %llu begins with invalid VG name.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -1423,7 +1423,7 @@ int read_metadata_location_summary(const struct format_type *fmt, (off_t) (dev_area->start + MDA_HEADER_SIZE), wrap, calc_crc, vgsummary->vgname ? 1 : 0, vgsummary)) { - log_error("Metadata location on %s at %llu has invalid summary for VG.", + log_warn("WARNING: metadata on %s at %llu has invalid summary for VG.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -1431,7 +1431,7 @@ int read_metadata_location_summary(const struct format_type *fmt, /* Ignore this entry if the characters aren't permissible */ if (!validate_name(vgsummary->vgname)) { - log_error("Metadata location on %s at %llu has invalid VG name.", + log_warn("WARNING: metadata on %s at %llu has invalid VG name.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); return 0; @@ -2508,3 +2508,4 @@ int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev, return 1; } + diff --git a/lib/format_text/import.c b/lib/format_text/import.c index 64fbb08b9..743077b69 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -61,13 +61,13 @@ int text_read_metadata_summary(const struct format_type *fmt, offset2, size2, checksum_fn, vgsummary->mda_checksum, checksum_only, 1)) { - /* FIXME: handle errors */ - log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + log_warn("WARNING: invalid metadata text from %s at %llu.", + dev_name(dev), (unsigned long long)offset); goto out; } } else { if (!config_file_read(cft)) { - log_error("Couldn't read volume group metadata from file."); + log_warn("WARNING: invalid metadata text from file."); goto out; } } diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 170293409..934bc7368 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -241,12 +241,10 @@ void del_bas(struct dm_list *bas) del_das(bas); } -/* FIXME: refactor this function with other mda constructor code */ int add_mda(const struct format_type *fmt, struct dm_pool *mem, struct dm_list *mdas, struct device *dev, uint64_t start, uint64_t size, unsigned ignored, struct metadata_area **mda_new) { -/* FIXME List size restricted by pv_header SECTOR_SIZE */ struct metadata_area *mdal, *mda; struct mda_lists *mda_lists = (struct mda_lists *) fmt->private; struct mda_context *mdac, *mdac2; @@ -322,23 +320,22 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)), return 1; } -struct _update_mda_baton { - struct lvmcache_info *info; - struct label *label; -}; - -static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) +static int _read_mda_header_and_metadata(const struct format_type *fmt, + struct metadata_area *mda, + struct lvmcache_vgsummary *vgsummary, + uint32_t *bad_fields) { - struct _update_mda_baton *p = baton; - const struct format_type *fmt = p->label->labeller->fmt; struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct mda_header *mdah; - struct lvmcache_vgsummary vgsummary = { 0 }; - uint32_t bad_fields = 0; - if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda), 0, &bad_fields))) { - log_error("Failed to read mda header from %s", dev_name(mdac->area.dev)); - goto fail; + if (!(mdah = raw_read_mda_header(fmt, &mdac->area, (mda->mda_num == 1), 0, bad_fields))) { + log_warn("WARNING: bad metadata header on %s at %llu.", + dev_name(mdac->area.dev), + (unsigned long long)mdac->area.start); + if (mda) + mda->header_start = mdac->area.start; + *bad_fields |= BAD_MDA_HEADER; + return 0; } if (mda) @@ -350,42 +347,51 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) log_debug_metadata("Ignoring mda on device %s at offset " FMTu64, dev_name(mdac->area.dev), mdac->area.start); + vgsummary->mda_ignored = 1; return 1; } if (!read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, - &vgsummary, &mdac->free_sectors)) { - if (vgsummary.zero_offset) + vgsummary, &mdac->free_sectors)) { + if (vgsummary->zero_offset) return 1; - log_error("Failed to read metadata summary from %s", dev_name(mdac->area.dev)); - goto fail; - } - - if (!lvmcache_update_vgname_and_id(p->info, &vgsummary)) { - log_error("Failed to save lvm summary for %s", dev_name(mdac->area.dev)); - goto fail; + log_warn("WARNING: bad metadata text on %s in mda%d", + dev_name(mdac->area.dev), mda->mda_num); + *bad_fields |= BAD_MDA_TEXT; + return 0; } return 1; - -fail: - lvmcache_del(p->info); - return 0; } +/* + * Used by label_scan to get a summary of the VG that exists on this PV. This + * summary is stored in lvmcache vginfo/info/info->mdas and is used later by + * vg_read which needs to know which PVs to read for a given VG name, and where + * the metadata is at for those PVs. + */ + static int _text_read(struct labeller *labeller, struct device *dev, void *label_buf, uint64_t label_sector, int *is_duplicate) { + struct lvmcache_vgsummary vgsummary; + struct lvmcache_info *info; + const struct format_type *fmt = labeller->fmt; struct label_header *lh = (struct label_header *) label_buf; struct pv_header *pvhdr; struct pv_header_extension *pvhdr_ext; - struct lvmcache_info *info; + struct metadata_area *mda; + struct metadata_area *mda1 = NULL; + struct metadata_area *mda2 = NULL; struct disk_locn *dlocn_xl; uint64_t offset; uint32_t ext_version; - struct _update_mda_baton baton; - struct label *label; + uint32_t bad_fields; + int mda_count = 0; + int good_mda_count = 0; + int bad_mda_count = 0; + int rv1, rv2; /* * PV header base @@ -411,8 +417,6 @@ static int _text_read(struct labeller *labeller, struct device *dev, void *label FMT_TEXT_ORPHAN_VG_NAME, 0, is_duplicate))) return_0; - label = lvmcache_get_label(info); - lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl)); lvmcache_del_das(info); @@ -426,11 +430,27 @@ static int _text_read(struct labeller *labeller, struct device *dev, void *label dlocn_xl++; } - /* Metadata area headers */ dlocn_xl++; + + /* Metadata areas */ while ((offset = xlate64(dlocn_xl->offset))) { - lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0, NULL); + + /* + * This just calls add_mda() above, replacing info with info->mdas. + */ + lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0, &mda); + dlocn_xl++; + mda_count++; + + if (mda_count == 1) { + mda1 = mda; + mda1->mda_num = 1; + } + else if (mda_count == 2) { + mda2 = mda; + mda2->mda_num = 2; + } } dlocn_xl++; @@ -440,7 +460,7 @@ static int _text_read(struct labeller *labeller, struct device *dev, void *label */ pvhdr_ext = (struct pv_header_extension *) ((char *) dlocn_xl); if (!(ext_version = xlate32(pvhdr_ext->version))) - goto out; + goto scan_mdas; log_debug_metadata("%s: PV header extension version " FMTu32 " found", dev_name(dev), ext_version); @@ -457,22 +477,117 @@ static int _text_read(struct labeller *labeller, struct device *dev, void *label lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size)); dlocn_xl++; } -out: - baton.info = info; - baton.label = label; - /* - * In the vg_read phase, we compare all mdas and decide which to use - * which are bad and need repair. - * - * FIXME: this quits if the first mda is bad, but we need something - * smarter to be able to use the second mda if it's good. - */ - if (!lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton)) { - log_error("Failed to scan VG from %s", dev_name(dev)); - return 0; + scan_mdas: + if (!mda_count) { + log_debug_metadata("Scanning %s found no mdas.", dev_name(dev)); + return 1; } + /* + * Track which devs have bad metadata so repair can find them (even if + * this dev also has good metadata that we are able to use). + * + * When bad metadata is seen, the unusable mda struct is removed from + * lvmcache info->mdas. This means that vg_read and vg_write will skip + * the bad mda not try to read or write the bad metadata. The bad mdas + * are saved in a separate bad_mdas list in lvmcache so that repair can + * find them to repair. + */ + + if (mda1) { + log_debug_metadata("Scanning %s mda1 summary.", dev_name(dev)); + memset(&vgsummary, 0, sizeof(vgsummary)); + bad_fields = 0; + vgsummary.mda_num = 1; + + rv1 = _read_mda_header_and_metadata(fmt, mda1, &vgsummary, &bad_fields); + + if (rv1 && !vgsummary.zero_offset && !vgsummary.mda_ignored) { + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + /* I believe this is only an internal error. */ + log_warn("WARNING: Scanning %s mda1 failed to save internal summary.", dev_name(dev)); + + dm_list_del(&mda1->list); + bad_fields |= BAD_MDA_INTERNAL; + mda1->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda1); + mda1 = NULL; + bad_mda_count++; + } else { + /* The normal success path */ + log_debug("Scanned %s mda1 seqno %u", dev_name(dev), vgsummary.seqno); + good_mda_count++; + } + } + + if (!rv1) { + /* + * Remove the bad mda from normal mda list so it's not + * used by vg_read/vg_write, but keep track of it in + * lvmcache for repair. + */ + log_warn("WARNING: scanning %s mda1 failed to read metadata summary.", dev_name(dev)); + log_warn("WARNING: repair VG metadata on %s with vgck --updatemetadata.", dev_name(dev)); + + dm_list_del(&mda1->list); + mda1->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda1); + mda1 = NULL; + bad_mda_count++; + } + } + + if (mda2) { + log_debug_metadata("Scanning %s mda2 summary.", dev_name(dev)); + memset(&vgsummary, 0, sizeof(vgsummary)); + bad_fields = 0; + vgsummary.mda_num = 2; + + rv2 = _read_mda_header_and_metadata(fmt, mda2, &vgsummary, &bad_fields); + + if (rv2 && !vgsummary.zero_offset && !vgsummary.mda_ignored) { + if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { + /* I believe this is only an internal error. */ + log_warn("WARNING: Scanning %s mda2 failed to save internal summary.", dev_name(dev)); + + dm_list_del(&mda2->list); + bad_fields |= BAD_MDA_INTERNAL; + mda2->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda2); + mda2 = NULL; + bad_mda_count++; + } else { + /* The normal success path */ + log_debug("Scanned %s mda2 seqno %u", dev_name(dev), vgsummary.seqno); + good_mda_count++; + } + } + + if (!rv2) { + /* + * Remove the bad mda from normal mda list so it's not + * used by vg_read/vg_write, but keep track of it in + * lvmcache for repair. + */ + log_warn("WARNING: scanning %s mda2 failed to read metadata summary.", dev_name(dev)); + log_warn("WARNING: repair VG metadata on %s with vgck --updatemetadata.", dev_name(dev)); + + dm_list_del(&mda2->list); + mda2->bad_fields = bad_fields; + lvmcache_save_bad_mda(info, mda2); + mda2 = NULL; + bad_mda_count++; + } + } + + if (good_mda_count) + return 1; + + if (bad_mda_count) + return 0; + + /* no metadata in the mdas */ return 1; } diff --git a/lib/label/label.c b/lib/label/label.c index 4d008ed14..a8d87ec16 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -432,9 +432,6 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f, ret = labeller->ops->read(labeller, dev, label_buf, sector, &is_duplicate); if (!ret) { - /* FIXME: handle errors */ - lvmcache_del_dev(dev); - if (is_duplicate) { /* * _text_read() called lvmcache_add() which found an @@ -720,7 +717,6 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f, scan_failed = 1; scan_process_errors++; scan_failed_count++; - lvmcache_del_dev(devl->dev); } } diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 3af89061d..10593a08b 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -181,15 +181,14 @@ #define MIRROR_SKIP_INIT_SYNC 0x00000010U /* skip initial sync */ /* vg_read and vg_read_for_update flags */ -#define READ_ALLOW_INCONSISTENT 0x00010000U #define READ_ALLOW_EXPORTED 0x00020000U #define READ_OK_NOTFOUND 0x00040000U #define READ_WARN_INCONSISTENT 0x00080000U #define READ_FOR_UPDATE 0x00100000U /* A meta-flag, useful with toollib for_each_* functions. */ #define PROCESS_SKIP_SCAN 0x00200000U /* skip lvmcache_label_scan in process_each_pv */ -/* vg's "read_status" field */ -#define FAILED_INCONSISTENT 0x00000001U +/* vg_read returns these in error_flags */ +#define FAILED_NOT_ENABLED 0x00000001U #define FAILED_LOCKING 0x00000002U #define FAILED_NOTFOUND 0x00000004U #define FAILED_READ_ONLY 0x00000008U @@ -202,6 +201,7 @@ #define FAILED_SYSTEMID 0x00000400U #define FAILED_LOCK_TYPE 0x00000800U #define FAILED_LOCK_MODE 0x00001000U +#define FAILED_INTERNAL_ERROR 0x00002000U #define SUCCESS 0x00000000U #define VGMETADATACOPIES_ALL UINT32_MAX @@ -717,24 +717,14 @@ int lv_resize(struct logical_volume *lv, struct lvresize_params *lp, struct dm_list *pvh); -/* - * Return a handle to VG metadata. - */ -struct volume_group *vg_read_internal(struct cmd_context *cmd, - const char *vgname, const char *vgid, - uint32_t lockd_state, uint32_t warn_flags, - int enable_repair, - int *mdas_consistent); -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state); +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, + uint32_t *error_flags, struct volume_group **error_vg); struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, const char *vgid, uint32_t read_flags, uint32_t lockd_state); -struct volume_group *vg_read_orphans(struct cmd_context *cmd, - uint32_t warn_flags, - const char *orphan_vgname); -/* - * Test validity of a VG handle. - */ +struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname); + +/* this is historical and being removed, don't use */ uint32_t vg_read_error(struct volume_group *vg_handle); /* pe_start and pe_end relate to any existing data so that new metadata @@ -757,7 +747,7 @@ uint32_t pv_list_extents_free(const struct dm_list *pvh); int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name); int vg_validate(struct volume_group *vg); struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name); -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name); +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, int *exists); int vg_remove_mdas(struct volume_group *vg); int vg_remove_check(struct volume_group *vg); void vg_remove_pvs(struct volume_group *vg); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 2d3ff1c28..90409b359 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -43,6 +43,46 @@ static struct physical_volume *_pv_read(struct cmd_context *cmd, struct volume_group *vg, struct lvmcache_info *info); +static int _check_pv_ext(struct cmd_context *cmd, struct volume_group *vg) +{ + struct lvmcache_info *info; + uint32_t ext_version, ext_flags; + struct pv_list *pvl; + + if (vg_is_foreign(vg)) + return 1; + + if (vg_is_shared(vg)) + return 1; + + dm_list_iterate_items(pvl, &vg->pvs) { + if (is_missing_pv(pvl->pv)) + continue; + + /* is_missing_pv doesn't catch NULL dev */ + if (!pvl->pv->dev) + continue; + + if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 0))) + continue; + + ext_version = lvmcache_ext_version(info); + if (ext_version < PV_HEADER_EXTENSION_VSN) { + log_warn("WARNING: PV %s in VG %s is using an old PV header, modify the VG to update.", + dev_name(pvl->pv->dev), vg->name); + continue; + } + + ext_flags = lvmcache_ext_flags(info); + if (!(ext_flags & PV_EXT_USED)) { + log_warn("WARNING: PV %s in VG %s is missing the used flag in PV header.", + dev_name(pvl->pv->dev), vg->name); + } + } + + return 1; +} + /* * Historically, DEFAULT_PVMETADATASIZE was 255 for many years, * but that value was only used if default_data_alignment was @@ -373,6 +413,7 @@ int add_pv_to_vg(struct volume_group *vg, const char *pv_name, return 1; } +#if 0 static int _copy_pv(struct dm_pool *pvmem, struct physical_volume *pv_to, struct physical_volume *pv_from) @@ -414,6 +455,7 @@ bad: dm_pool_free(pvmem, pvl_to); return NULL; } +#endif static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to, const char *pv_name, int enforce_pv_from_source) @@ -587,7 +629,7 @@ int vg_remove_check(struct volume_group *vg) { unsigned lv_count; - if (vg_read_error(vg) || vg_missing_pv_count(vg)) { + if (vg_missing_pv_count(vg)) { log_error("Volume group \"%s\" not found, is inconsistent " "or has PVs missing.", vg ? vg->name : ""); log_error("Consider vgreduce --removemissing if metadata " @@ -966,6 +1008,7 @@ static int _vg_update_embedded_copy(struct volume_group *vg, struct volume_group return 1; } +#if 0 /* * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a * possible failure code or zero for success. @@ -995,6 +1038,7 @@ static struct volume_group *_vg_make_handle(struct cmd_context *cmd, return vg; } +#endif int lv_has_unknown_segments(const struct logical_volume *lv) { @@ -1017,24 +1061,24 @@ int vg_has_unknown_segments(const struct volume_group *vg) return 0; } -struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name) +struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name, int *exists) { uint32_t rc; struct volume_group *vg; if (!validate_name(vg_name)) { log_error("Invalid vg name %s", vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ return NULL; } rc = vg_lock_newname(cmd, vg_name); + if (rc == FAILED_EXIST) + *exists = 1; if (rc != SUCCESS) - /* NOTE: let caller decide - this may be check for existence */ - return _vg_make_handle(cmd, NULL, rc); + return NULL; vg = vg_create(cmd, vg_name); - if (!vg || vg_read_error(vg)) + if (!vg) unlock_vg(cmd, NULL, vg_name); return vg; @@ -1042,11 +1086,6 @@ struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_ /* * Create a VG with default parameters. - * Returns: - * - struct volume_group* with SUCCESS code: VG structure created - * - NULL or struct volume_group* with FAILED_* code: error creating VG structure - * Use vg_read_error() to determine success or failure. - * FIXME: cleanup usage of _vg_make_handle() */ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) { @@ -1087,11 +1126,10 @@ struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name) vg_name); goto bad; } - return _vg_make_handle(cmd, vg, SUCCESS); + return vg; bad: unlock_and_release_vg(cmd, vg, vg_name); - /* FIXME: use _vg_make_handle() w/proper error code */ return NULL; } @@ -3216,6 +3254,7 @@ void vg_revert(struct volume_group *vg) } } +#if 0 static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) { int *in_use = _in_use; @@ -3223,6 +3262,7 @@ static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) *in_use = 1; return 1; } +#endif struct _vg_read_orphan_baton { struct cmd_context *cmd; @@ -3395,9 +3435,7 @@ static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton) } /* Make orphan PVs look like a VG. */ -struct volume_group *vg_read_orphans(struct cmd_context *cmd, - uint32_t warn_flags, - const char *orphan_vgname) +struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan_vgname) { const struct format_type *fmt; struct lvmcache_vginfo *vginfo; @@ -3458,6 +3496,7 @@ struct volume_group *vg_read_orphans(struct cmd_context *cmd, return vg; } +#if 0 static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg) { struct pv_list *pvl, *pvl2; @@ -3491,6 +3530,7 @@ static void _free_pv_list(struct dm_list *all_pvs) dm_list_iterate_items(pvl, all_pvs) pvl->pv->fid->fmt->ops->destroy_instance(pvl->pv->fid); } +#endif static void _destroy_fid(struct format_instance **fid) { @@ -3511,6 +3551,7 @@ int vg_missing_pv_count(const struct volume_group *vg) return ret; } +#if 0 static int _check_reappeared_pv(struct volume_group *correct_vg, struct physical_volume *pv, int act) { @@ -4177,6 +4218,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, return correct_vg; } +#endif #define DEV_LIST_DELIM ", " @@ -4266,9 +4308,6 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) struct device_list *dl; int found_inconsistent = 0; - if (is_orphan_vg(vg->name)) - return 1; - strncpy(vgid, (const char *) vg->id.uuid, sizeof(vgid)); vgid[ID_LEN] = '\0'; @@ -4316,6 +4355,7 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) return 1; } +#if 0 struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname, const char *vgid, uint32_t lockd_state, uint32_t warn_flags, @@ -4375,6 +4415,7 @@ out: return vg; } +#endif void free_pv_fid(struct physical_volume *pv) { @@ -4438,7 +4479,7 @@ static void _set_pv_device(struct format_instance *fid, buffer[0] = '\0'; if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single) - log_error_once("Couldn't find device with uuid %s.", buffer); + log_warn("WARNING: Couldn't find device with uuid %s.", buffer); else log_debug_metadata("Couldn't find device with uuid %s.", buffer); } @@ -4686,6 +4727,7 @@ int vg_check_status(const struct volume_group *vg, uint64_t status) return !vg_bad_status_bits(vg, status); } +#if 0 /* * VG is left unlocked on failure */ @@ -4727,6 +4769,7 @@ static struct volume_group *_recover_vg(struct cmd_context *cmd, return (struct volume_group *)vg; } +#endif static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id) { @@ -4757,9 +4800,6 @@ static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id static int _access_vg_lock_type(struct cmd_context *cmd, struct volume_group *vg, uint32_t lockd_state, uint32_t *failure) { - if (!is_real_vg(vg->name)) - return 1; - if (cmd->lockd_vg_disable) return 1; @@ -4905,6 +4945,7 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg) return 0; } +#if 0 /* * FIXME: move vg_bad_status_bits() checks in here. */ @@ -5108,6 +5149,7 @@ struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_ return vg; } +#endif /* * Test the validity of a VG handle returned by vg_read() or vg_read_for_update(). @@ -5117,7 +5159,7 @@ uint32_t vg_read_error(struct volume_group *vg_handle) if (!vg_handle) return FAILED_ALLOCATION; - return vg_handle->read_status; + return SUCCESS; } /* @@ -5615,7 +5657,7 @@ int lv_on_pmem(struct logical_volume *lv) int vg_is_foreign(struct volume_group *vg) { - return _is_foreign_vg(vg); + return vg->cmd->system_id && strcmp(vg->system_id, vg->cmd->system_id); } void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg) @@ -5701,3 +5743,539 @@ void vg_write_commit_bad_mdas(struct cmd_context *cmd, struct volume_group *vg) } } } + +static struct volume_group *_vg_read(struct cmd_context *cmd, + const char *vgname, + const char *vgid, + unsigned precommitted) +{ + const struct format_type *fmt = cmd->fmt; + struct format_instance *fid = NULL; + struct format_instance_ctx fic; + struct volume_group *vg, *vg_ret = NULL; + struct metadata_area *mda, *mda2; + unsigned use_precommitted = precommitted; + struct device *mda_dev, *dev_ret; + struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ + int found_old_metadata = 0; + unsigned use_previous_vg; + + log_debug_metadata("Reading VG %s %s", vgname ?: "", vgid ?: ""); + + /* + * Rescan the devices that are associated with this vg in lvmcache. + * This repeats what was done by the command's initial label scan, + * but only the devices associated with this VG. + * + * The lvmcache info about these devs is from the initial label scan + * performed by the command before the vg lock was held. Now the VG + * lock is held, so we rescan all the info from the devs in case + * something changed between the initial scan and now that the lock + * is held. + * + * Some commands (e.g. reporting) are fine reporting data read by + * the label scan. It doesn't matter if the devs changed between + * the label scan and here, we can report what was seen in the + * scan, even though it is the old state, since we will not be + * making any modifications. If the VG was being modified during + * the scan, and caused us to see inconsistent metadata on the + * different PVs in the VG, then we do want to rescan the devs + * here to get a consistent view of the VG. Note that we don't + * know if the scan found all the PVs in the VG at this point. + * We don't know that until vg_read looks at the list of PVs in + * the metadata and compares that to the devices found by the scan. + * + * It's possible that a change made to the VG during scan was + * adding or removing a PV from the VG. In this case, the list + * of devices associated with the VG in lvmcache would change + * due to the rescan. + * + * The devs in the VG may be persistently inconsistent due to some + * previous problem. In this case, rescanning the labels here will + * find the same inconsistency. The VG repair (mistakenly done by + * vg_read below) is supposed to fix that. + * + * FIXME: sort out the usage of the global lock (which is mixed up + * with the orphan lock), and when we can tell that the global + * lock is taken prior to the label scan, and still held here, + * we can also skip the rescan in that case. + */ + if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { + log_debug_metadata("Rescanning devices for %s", vgname); + lvmcache_label_rescan_vg(cmd, vgname, vgid); + } else { + log_debug_metadata("Skipped rescanning devices for %s", vgname); + } + + /* Now determine the correct vgname if none was supplied */ + if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { + log_debug_metadata("Cache did not find VG name from vgid %s", vgid); + return NULL; + } + + /* Determine the correct vgid if none was supplied */ + if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) { + log_debug_metadata("Cache did not find VG vgid from name %s", vgname); + return NULL; + } + + /* + * A "format instance" is an abstraction for a VG location, + * i.e. where a VG's metadata exists on disk. + * + * An fic (format_instance_ctx) is a temporary struct used + * to create an fid (format_instance). The fid hangs around + * and is used to create a 'vg' to which it connected (vg->fid). + * + * The 'fic' describes a VG in terms of fmt/name/id. + * + * The 'fid' describes a VG in more detail than the fic, + * holding information about where to find the VG metadata. + * + * The 'vg' describes the VG in the most detail representing + * all the VG metadata. + * + * The fic and fid are set up by create_instance() to describe + * the VG location. This happens before the VG metadata is + * assembled into the more familiar struct volume_group "vg". + * + * The fid has one main purpose: to keep track of the metadata + * locations for a given VG. It does this by putting 'mda' + * structs on fid->metadata_areas_in_use, which specify where + * metadata is located on disk. It gets this information + * (metadata locations for a specific VG) from the command's + * initial label scan. The info is passed indirectly via + * lvmcache info/vginfo structs, which are created by the + * label scan and then copied into fid by create_instance(). + * + * FIXME: just use the vginfo/info->mdas lists directly instead + * of copying them into the fid list. + */ + + fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; + fic.context.vg_ref.vg_name = vgname; + fic.context.vg_ref.vg_id = vgid; + + /* + * Sets up the metadata areas that we need to read below. + * For each info in vginfo->infos, for each mda in info->mdas, + * (found during label_scan), copy the mda to fid->metadata_areas_in_use + */ + if (!(fid = fmt->ops->create_instance(fmt, &fic))) { + log_error("Failed to create format instance"); + return NULL; + } + + /* + * We use the fid globally here so prevent the release_vg + * call to destroy the fid - we may want to reuse it! + */ + fid->ref_count++; + + + /* + * label_scan found PVs for this VG and set up lvmcache to describe the + * VG/PVs that we use here to read the VG. It created 'vginfo' for the + * VG, and created an 'info' attached to vginfo for each PV. It also + * added a metadata_area struct to info->mdas for each metadata area it + * found on the PV. The info->mdas structs are copied to + * fid->metadata_areas_in_use by create_instance above, and here we + * read VG metadata from each of those mdas. + */ + dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + + /* I don't think this can happen */ + if (!mda_dev) { + log_warn("Ignoring metadata for VG %s from missing dev.", vgname); + continue; + } + + use_previous_vg = 0; + + if (use_precommitted) { + log_debug_metadata("Reading VG %s precommit metadata from %s %llu", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + + vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg); + + if (!vg && !use_previous_vg) { + log_warn("WARNING: Reading VG %s precommit on %s failed.", vgname, dev_name(mda_dev)); + vg_fmtdata = NULL; + continue; + } + } else { + log_debug_metadata("Reading VG %s metadata from %s %llu", + vgname, dev_name(mda_dev), (unsigned long long)mda->header_start); + + vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg); + + if (!vg && !use_previous_vg) { + log_warn("WARNING: Reading VG %s on %s failed.", vgname, dev_name(mda_dev)); + vg_fmtdata = NULL; + continue; + } + } + + if (!vg) + continue; + + if (vg && !vg_ret) { + vg_ret = vg; + dev_ret = mda_dev; + continue; + } + + /* + * Use the newest copy of the metadata found on any mdas. + * Above, We could check if the scan found an old metadata + * seqno in this mda and just skip reading it again; then these + * seqno checks would just be sanity checks. + */ + + if (vg->seqno == vg_ret->seqno) { + release_vg(vg); + continue; + } + + if (vg->seqno > vg_ret->seqno) { + log_warn("WARNING: ignoring metadata seqno %u on %s for seqno %u on %s for VG %s.", + vg_ret->seqno, dev_name(dev_ret), + vg->seqno, dev_name(mda_dev), vg->name); + found_old_metadata = 1; + release_vg(vg_ret); + vg_ret = vg; + dev_ret = mda_dev; + vg_fmtdata = NULL; + continue; + } + + if (vg_ret->seqno > vg->seqno) { + log_warn("WARNING: ignoring metadata seqno %u on %s for seqno %u on %s for VG %s.", + vg->seqno, dev_name(mda_dev), + vg_ret->seqno, dev_name(dev_ret), vg->name); + found_old_metadata = 1; + release_vg(vg); + vg_fmtdata = NULL; + continue; + } + } + + if (found_old_metadata) + log_warn("WARNING: Inconsistent metadata found for VG %s", vgname); + + vg = NULL; + + if (vg_ret) + set_pv_devices(fid, vg_ret); + + fid->ref_count--; + + if (!vg_ret) { + _destroy_fid(&fid); + goto_out; + } + + /* + * Correct the lvmcache representation of the VG using the metadata + * that we have chosen above (vg_ret). + * + * The vginfo/info representation created by label_scan was not + * entirely correct since it did not use the full or final metadata. + * + * In lvmcache, PVs with no mdas were not attached to the vginfo during + * label_scan because label_scan didn't know where they should go. Now + * that we have the VG metadata we can tell, so use that to attach those + * info's to the vginfo. + * + * Also, outdated PVs that have been removed from the VG were incorrectly + * attached to the vginfo during label_scan, and now need to be detached. + */ + lvmcache_update_vg_from_read(vg_ret, vg_ret->status & PRECOMMITTED); + + /* + * lvmcache_update_vg identified outdated mdas that we read above that + * are not actually part of the VG. Remove those outdated mdas from + * the fid's list of mdas. + */ + dm_list_iterate_items_safe(mda, mda2, &fid->metadata_areas_in_use) { + mda_dev = mda_get_device(mda); + if (lvmcache_is_outdated_dev(cmd, vg_ret->name, (const char *)&vg_ret->id, mda_dev)) { + log_debug_metadata("vg_read %s ignore mda for outdated dev %s", + vg_ret->name, dev_name(mda_dev)); + dm_list_del(&mda->list); + } + } + +out: + return vg_ret; +} + +struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid, + uint32_t read_flags, uint32_t lockd_state, + uint32_t *error_flags, struct volume_group **error_vg) +{ + char uuidstr[64] __attribute__((aligned(8))); + struct volume_group *vg = NULL; + struct lv_list *lvl; + struct pv_list *pvl; + int missing_pv_dev = 0; + int missing_pv_flag = 0; + uint32_t failure = 0; + int writing = (read_flags & READ_FOR_UPDATE); + + if (is_orphan_vg(vg_name)) { + log_very_verbose("Reading orphan VG %s", vg_name); + vg = vg_read_orphans(cmd, vg_name); + *error_flags = 0; + *error_vg = NULL; + return vg; + } + + if (!validate_name(vg_name)) { + log_error("Volume group name \"%s\" has invalid characters.", vg_name); + return NULL; + } + + if (!lock_vol(cmd, vg_name, writing ? LCK_VG_WRITE : LCK_VG_READ, NULL)) { + log_error("Can't get lock for %s", vg_name); + failure |= FAILED_LOCKING; + goto_bad; + } + + if (!(vg = _vg_read(cmd, vg_name, vgid, 0))) { + /* Some callers don't care if the VG doesn't exist and don't want an error message. */ + if (!(read_flags & READ_OK_NOTFOUND)) + log_error("Volume group \"%s\" not found", vg_name); + failure |= FAILED_NOTFOUND; + goto_bad; + } + + /* + * Check and warn if PV ext info is not in sync with VG metadata + * (vg_write fixes.) + */ + _check_pv_ext(cmd, vg); + + if (!vg_strip_outdated_historical_lvs(vg)) + log_warn("WARNING: failed to strip outdated historical lvs."); + + /* + * Check for missing devices in the VG. In most cases a VG cannot be + * changed while it's missing devices. This restriction is implemented + * here in vg_read. Below we return an error from vg_read if the + * vg_read flag indicates that the command is going to modify the VG. + * (We should probably implement this restriction elsewhere instead of + * returning an error from vg_read.) + * + * The PV's device may be present while the PV for the device has the + * MISSING_PV flag set in the metadata. This happened because the VG + * was written while this dev was missing, so the MISSING flag was + * written in the metadata for PV. Now the device has reappeared. + * However, the VG has changed since the device was last present, and + * if the device has outdated data it may not be safe to just start + * using it again. + * + * If there were no PE's used on the PV, we can just clear the MISSING + * flag, but if there were PE's used we need to continue to treat the + * PV as if the device is missing, limiting operations like the VG has + * a missing device, and requiring the user to remove the reappeared + * device from the VG, like a missing device, with vgreduce + * --removemissing. + */ + dm_list_iterate_items(pvl, &vg->pvs) { + if (!id_write_format(&pvl->pv->id, uuidstr, sizeof(uuidstr))) + uuidstr[0] = '\0'; + + if (!pvl->pv->dev) { + /* The obvious and common case of a missing device. */ + + log_warn("WARNING: VG %s is missing PV %s.", vg_name, uuidstr); + missing_pv_dev++; + + } else if (pvl->pv->status & MISSING_PV) { + /* A device that was missing but has reappeared. */ + + if (pvl->pv->pe_alloc_count == 0) { + log_warn("WARNING: VG %s has unused reappeared PV %s %s.", vg_name, dev_name(pvl->pv->dev), uuidstr); + pvl->pv->status &= ~MISSING_PV; + /* tell vgextend restoremissing that MISSING flag was cleared here */ + pvl->pv->unused_missing_cleared = 1; + } else { + log_warn("WARNING: VG %s was missing PV %s %s.", vg_name, dev_name(pvl->pv->dev), uuidstr); + missing_pv_flag++; + } + } + } + + if (missing_pv_dev || missing_pv_flag) + vg_mark_partial_lvs(vg, 1); + + if (!check_pv_segments(vg)) { + log_error(INTERNAL_ERROR "PV segments corrupted in %s.", vg->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + + dm_list_iterate_items(lvl, &vg->lvs) { + if (!check_lv_segments(lvl->lv, 0)) { + log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + } + + dm_list_iterate_items(lvl, &vg->lvs) { + /* Checks that cross-reference other LVs. */ + if (!check_lv_segments(lvl->lv, 1)) { + log_error(INTERNAL_ERROR "LV segments corrupted in %s.", lvl->lv->name); + failure |= FAILED_INTERNAL_ERROR; + goto_bad; + } + } + + if (!check_pv_dev_sizes(vg)) + log_warn("WARNING: One or more devices used as PVs in VG %s have changed sizes.", vg->name); + + _check_devs_used_correspond_with_vg(vg); + + if (!_access_vg_lock_type(cmd, vg, lockd_state, &failure)) { + /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */ + goto_bad; + } + + if (!_access_vg_systemid(cmd, vg)) { + failure |= FAILED_SYSTEMID; + goto_bad; + } + + if (!_access_vg_clustered(cmd, vg)) { + failure |= FAILED_CLUSTERED; + goto_bad; + } + + if (writing && !(read_flags & READ_ALLOW_EXPORTED) && vg_is_exported(vg)) { + log_error("Volume group %s is exported", vg->name); + failure |= FAILED_EXPORTED; + goto_bad; + } + + if (writing && !(vg->status & LVM_WRITE)) { + log_error("Volume group %s is read-only", vg->name); + failure |= FAILED_READ_ONLY; + goto_bad; + } + + if (!cmd->handles_missing_pvs && (missing_pv_dev || missing_pv_flag) && writing) { + log_error("Cannot change VG %s while PVs are missing.", vg->name); + log_error("See vgreduce --removemissing and vgextend --restoremissing."); + failure |= FAILED_NOT_ENABLED; + goto_bad; + } + + if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && writing) { + log_error("Cannot change VG %s with unknown segments in it!", vg->name); + failure |= FAILED_NOT_ENABLED; /* FIXME new failure code here? */ + goto_bad; + } + + /* + * When we are reading the VG with the intention of writing it, + * we save a second copy of the VG in vg->vg_committed. This + * copy remains unmodified by the command operation, and is used + * later if there is an error and we want to reactivate LVs. + * FIXME: be specific about exactly when this works correctly. + */ + if (writing) { + struct dm_config_tree *cft; + + if (dm_pool_locked(vg->vgmem)) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: pool locked"); + goto out; + } + + if (vg->vg_committed) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: copy exists"); + release_vg(vg->vg_committed); + vg->vg_committed = NULL; + } + + if (vg->vg_precommitted) { + /* FIXME: can this happen? */ + log_warn("WARNING: vg_read no vg copy: pre copy exists"); + release_vg(vg->vg_precommitted); + vg->vg_precommitted = NULL; + } + + if (!(cft = export_vg_to_config_tree(vg))) { + log_warn("WARNING: vg_read no vg copy: copy export failed"); + goto out; + } + + if (!(vg->vg_committed = import_vg_from_config_tree(cft, vg->fid))) + log_warn("WARNING: vg_read no vg copy: copy import failed"); + + dm_config_destroy(cft); + } else { + if (vg->vg_precommitted) + log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted); + if (vg->vg_committed) + log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed); + } +out: + /* We return with the VG lock held when read is successful. */ + *error_flags = SUCCESS; + if (error_vg) + *error_vg = NULL; + return vg; + +bad: + *error_flags = failure; + + /* + * FIXME: get rid of this case so we don't have to return the vg when + * there's an error. It is here for process_each_pv() which wants to + * eliminate the VG's devs from the list of devs it is processing, even + * when it can't access the VG because of wrong system id or similar. + * This could be done by looking at lvmcache info structs intead of 'vg'. + * It's also used by process_each_vg/process_each_lv which want to + * include error_vg values (like system_id) in error messages. + * These values could also be found from lvmcache vginfo. + */ + if (error_vg && vg) { + if (vg->vg_precommitted) + log_error(INTERNAL_ERROR "vg_read vg %p vg_precommitted %p", vg, vg->vg_precommitted); + if (vg->vg_committed) + log_error(INTERNAL_ERROR "vg_read vg %p vg_committed %p", vg, vg->vg_committed); + + /* caller must unlock_vg and release_vg */ + *error_vg = vg; + return_NULL; + } + + if (vg) { + unlock_vg(cmd, vg, vg_name); + release_vg(vg); + } + if (error_vg) + *error_vg = NULL; + return_NULL; +} + +/* + * Simply a version of vg_read() that automatically sets the READ_FOR_UPDATE + * flag, which means the caller intends to write the VG after reading it, + * so vg_read should acquire an exclusive file lock on the vg. + */ +struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, + const char *vgid, uint32_t read_flags, uint32_t lockd_state) +{ + struct volume_group *vg; + uint32_t error_flags = 0; + + vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state, &error_flags, NULL); + + return vg; +} diff --git a/lib/metadata/pv.h b/lib/metadata/pv.h index c162acd34..3430c2e1f 100644 --- a/lib/metadata/pv.h +++ b/lib/metadata/pv.h @@ -59,6 +59,7 @@ struct physical_volume { /* This is true whenever the represented PV has a label associated. */ uint64_t is_labelled:1; + uint64_t unused_missing_cleared:1; /* NB. label_sector is valid whenever is_labelled is true */ uint64_t label_sector; diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c index aca4fe783..beddf73de 100644 --- a/lib/metadata/vg.c +++ b/lib/metadata/vg.c @@ -84,7 +84,7 @@ static void _free_vg(struct volume_group *vg) void release_vg(struct volume_group *vg) { - if (!vg || (vg->fid && vg == vg->fid->fmt->orphan_vg)) + if (!vg || is_orphan_vg(vg->name)) return; release_vg(vg->vg_committed); @@ -711,9 +711,9 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg, vg->extent_count -= pv_pe_count(pv); /* FIXME: we don't need to vg_read the orphan vg here */ - orphan_vg = vg_read_orphans(cmd, 0, vg->fid->fmt->orphan_vg_name); + orphan_vg = vg_read_orphans(cmd, vg->fid->fmt->orphan_vg_name); - if (vg_read_error(orphan_vg)) + if (!orphan_vg) goto bad; if (!vg_split_mdas(cmd, vg, orphan_vg) || !vg->pv_count) { diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h index 3fd47569d..6e89b3375 100644 --- a/lib/metadata/vg.h +++ b/lib/metadata/vg.h @@ -122,11 +122,6 @@ struct volume_group { struct dm_list removed_pvs; uint32_t open_mode; /* FIXME: read or write - check lock type? */ - /* - * Store result of the last vg_read(). - * 0 for success else appropriate FAILURE_* bits set. - */ - uint32_t read_status; uint32_t mda_copies; /* target number of mdas for this VG */ struct dm_hash_table *hostnames; /* map of creation hostnames */ diff --git a/test/shell/inconsistent-metadata.sh b/test/shell/inconsistent-metadata.sh index b42715de4..e5ccf0cec 100644 --- a/test/shell/inconsistent-metadata.sh +++ b/test/shell/inconsistent-metadata.sh @@ -24,53 +24,48 @@ lvchange -a n $vg/mirror aux backup_dev "${DEVICES[@]}" -init() { +makeold() { + # reset metadata on all devs to starting condition aux restore_dev "${DEVICES[@]}" not check lv_field $vg/resized lv_size "8.00m" + # change the metadata on all devs lvresize -L 8192K $vg/resized + # reset metadata on just dev1 to the previous version aux restore_dev "$dev1" } -init -vgscan 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgscan 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out -check lv_field $vg/resized lv_size "8.00m" +# create old metadata +makeold -# vgdisplay fixes -init -vgdisplay $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgdisplay $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out -check lv_field $vg/resized lv_size "8.00m" - -# lvs fixes up -init -lvs $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgdisplay $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out -check lv_field $vg/resized lv_size "8.00m" - -# vgs fixes up as well -init +# reports old metadata vgs $vg 2>&1 | tee cmd.out -grep "Inconsistent metadata found for VG $vg" cmd.out -vgs $vg 2>&1 | tee cmd.out -not grep "Inconsistent metadata found for VG $vg" cmd.out +grep "ignoring metadata" cmd.out check lv_field $vg/resized lv_size "8.00m" -echo Check auto-repair of failed vgextend - metadata written to original pv but not new pv +# corrects old metadata +lvcreate -l1 -an $vg + +# no old report +vgs $vg 2>&1 | tee cmd.out +not grep "ignoring metadata" cmd.out +check lv_field $vg/resized lv_size "8.00m" + + +echo Check auto-repair of failed vgextend +echo - metadata written to original pv but not new pv + vgremove -f $vg pvremove -ff "${DEVICES[@]}" pvcreate "${DEVICES[@]}" + aux backup_dev "$dev2" vgcreate $SHARED $vg "$dev1" vgextend $vg "$dev2" aux restore_dev "$dev2" -vgscan + +vgs -o+vg_mda_count $vg +pvs -o+vg_mda_count + should check compare_fields vgs $vg vg_mda_count pvs "$dev2" vg_mda_count vgremove -ff $vg diff --git a/test/shell/lvconvert-repair-cache.sh b/test/shell/lvconvert-repair-cache.sh index 348dbaf31..49cfbb1f9 100644 --- a/test/shell/lvconvert-repair-cache.sh +++ b/test/shell/lvconvert-repair-cache.sh @@ -93,6 +93,9 @@ lvconvert --yes --uncache $vg/$lv1 aux enable_dev "$dev2" +# vg was changed while dev was missing +vgextend --restoremissing $vg "$dev2" + # FIXME: temporary workaround lvcreate -L1 -n $lv5 $vg lvremove -ff $vg diff --git a/test/shell/lvconvert-repair-policy.sh b/test/shell/lvconvert-repair-policy.sh index f9fca0028..b69658ea6 100644 --- a/test/shell/lvconvert-repair-policy.sh +++ b/test/shell/lvconvert-repair-policy.sh @@ -24,6 +24,8 @@ aux lvmconf 'allocation/maximise_cling = 0' \ cleanup_() { vgreduce --removemissing $vg for d in "$@"; do aux enable_dev "$d"; done + # clear the outdated metadata on enabled devs before we can reuse them + vgck --updatemetadata $vg for d in "$@"; do vgextend $vg "$d"; done lvremove -ff $vg/mirror lvcreate -aey --type mirror -m 1 --ignoremonitoring -l 2 -n mirror $vg "$dev1" "$dev2" "$dev3:0" diff --git a/test/shell/lvconvert-repair-raid.sh b/test/shell/lvconvert-repair-raid.sh index 2f417603d..711f386fd 100644 --- a/test/shell/lvconvert-repair-raid.sh +++ b/test/shell/lvconvert-repair-raid.sh @@ -65,6 +65,7 @@ aux disable_dev "$dev2" lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev2" +vgck --updatemetadata $vg vgextend $vg "$dev2" lvremove -ff $vg/$lv1 @@ -80,6 +81,7 @@ aux wait_for_sync $vg $lv1 lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev2" "$dev3" +vgck --updatemetadata $vg vgextend $vg "$dev2" "$dev3" lvremove -ff $vg/$lv1 @@ -96,6 +98,7 @@ aux disable_dev "$dev3" vgreduce --removemissing -f $vg lvconvert -y --repair $vg/$lv1 aux enable_dev "$dev3" +vgck --updatemetadata $vg pvcreate -yff "$dev3" vgextend $vg "$dev3" lvremove -ff $vg/$lv1 @@ -114,6 +117,7 @@ aux wait_for_sync $vg $lv1 lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev3" +vgck --updatemetadata $vg vgextend $vg "$dev3" lvremove -ff $vg/$lv1 @@ -128,6 +132,7 @@ aux disable_dev "$dev4" "$dev5" lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev4" "$dev5" +vgck --updatemetadata $vg vgextend $vg "$dev4" "$dev5" lvremove -ff $vg/$lv1 @@ -145,6 +150,7 @@ aux wait_for_sync $vg $lv1 lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev4" +vgck --updatemetadata $vg vgextend $vg "$dev4" lvremove -ff $vg/$lv1 @@ -163,6 +169,7 @@ aux wait_for_sync $vg $lv1 lvconvert -y --repair $vg/$lv1 vgreduce --removemissing $vg aux enable_dev "$dev4" +vgck --updatemetadata $vg vgextend $vg "$dev4" lvremove -ff $vg/$lv1 diff --git a/test/shell/lvconvert-repair.sh b/test/shell/lvconvert-repair.sh index ae8fa7e98..0d0231e30 100644 --- a/test/shell/lvconvert-repair.sh +++ b/test/shell/lvconvert-repair.sh @@ -106,17 +106,23 @@ lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev1" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev1" aux disable_dev "$dev2" lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev2" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev2" aux disable_dev "$dev3" lvconvert -y --repair $vg/mirror vgreduce --removemissing $vg aux enable_dev "$dev3" +# clear the outdated dev before we can reuse it +vgck --updatemetadata $vg vgextend $vg "$dev3" vgremove -ff $vg diff --git a/test/shell/lvmcache-exercise.sh b/test/shell/lvmcache-exercise.sh index 908cdf813..8cfb7f16e 100644 --- a/test/shell/lvmcache-exercise.sh +++ b/test/shell/lvmcache-exercise.sh @@ -43,14 +43,16 @@ pvs "$dev1" lvcreate -aey -m2 --type mirror -l4 --alloc anywhere --corelog -n $lv1 $vg2 aux disable_dev "$dev3" + +pvs 2>&1| tee out +grep "is missing PV" out + lvconvert --yes --repair $vg2/$lv1 + aux enable_dev "$dev3" -# here it should fix any reappeared devices -lvs - lvs -a $vg2 -o+devices 2>&1 | tee out -not grep reappeared out +not grep "is missing PV" out # This removes the first "vg1" using its uuid vgremove -ff -S vg_uuid=$UUID1 diff --git a/test/shell/mirror-vgreduce-removemissing.sh b/test/shell/mirror-vgreduce-removemissing.sh index d95a0ac7b..f2f7c1973 100644 --- a/test/shell/mirror-vgreduce-removemissing.sh +++ b/test/shell/mirror-vgreduce-removemissing.sh @@ -123,8 +123,16 @@ check_and_cleanup_lvs_() recover_vg_() { aux enable_dev "$@" + + # clear outdated metadata on PVs so they can be used again + vgck --updatemetadata $vg + + pvscan --cache + pvcreate -ff "$@" + # wipefs -a "$@" vgextend $vg "$@" + check_and_cleanup_lvs_ } diff --git a/test/shell/pv-ext-flags.sh b/test/shell/pv-ext-flags.sh index bae16df11..22e9b3aac 100644 --- a/test/shell/pv-ext-flags.sh +++ b/test/shell/pv-ext-flags.sh @@ -39,7 +39,6 @@ check pv_field "$dev2" pv_in_use "used" # disable $dev2 and dev1 with 0 MDAs remains, but still # marked as used, so pvcreate/vgcreate/pvremove should fail aux disable_dev "$dev2" -pvscan --cache check pv_field "$dev1" pv_in_use "used" not pvcreate "$dev1" 2>err @@ -71,20 +70,14 @@ vgcreate $vg1 "$dev1" "$dev2" # disable $dev1, then repair the VG - $dev1 is removed from VG aux disable_dev "$dev1" vgreduce --removemissing $vg1 -# now, enable $dev1, automatic repair will happen on pvs call -# (or any other lvm command that does vg_read with repair inside) -aux enable_dev "$dev1" -# FIXME: once persistent cache does not cause races with timestamps -# causing LVM tools to not see the VG inconsistency and once -# VG repair is always done, delete this line which removes -# persistent .cache as a workaround -rm -f "$TESTDIR/etc/.cache" +# now, enable $dev1 and clear the old metadata from it +aux enable_dev "$dev1" +vgck --updatemetadata $vg1 vgck $vg1 -# check $dev1 does not contain the PV_EXT_FLAG anymore - it -# should be removed as part of the repaid during vg_read since -# $dev1 is not part of $vg1 anymore + +# check $dev1 does not contain the PV_EXT_FLAG anymore check pv_field "$dev1" pv_in_use "" ############################################# @@ -105,7 +98,6 @@ check pv_field "$dev2" pv_in_use "used" pvchange --metadataignore y "$dev1" aux disable_dev "$dev2" -pvscan --cache check pv_field "$dev1" pv_in_use "used" not pvcreate "$dev1" 2>err @@ -136,20 +128,14 @@ vgcreate $vg1 "$dev1" "$dev2" # disable $dev1, then repair the VG - $dev1 is removed from VG aux disable_dev "$dev1" vgreduce --removemissing $vg1 -# now, enable $dev1, automatic repair will happen on pvs call -# (or any other lvm command that does vg_read with repair inside) -aux enable_dev "$dev1" -# FIXME: once persistent cache does not cause races with timestamps -# causing LVM tools to not see the VG inconsistency and once -# VG repair is always done, delete this line which removes -# persistent .cache as a workaround -rm -f "$TESTDIR/etc/.cache" +# now, enable $dev1 and clear the old metadata from it +aux enable_dev "$dev1" +vgck --updatemetadata $vg1 vgck $vg1 -# check $dev1 does not contain the PV_EXT_FLAG anymore - it -# should be removed as part of the repaid during vg_read since -# $dev1 is not part of $vg1 anymore + +# check $dev1 does not contain the PV_EXT_FLAG anymore check pv_field "$dev1" pv_in_use "" ########################### diff --git a/test/shell/unlost-pv.sh b/test/shell/unlost-pv.sh index edf7f31e2..50f89287e 100644 --- a/test/shell/unlost-pv.sh +++ b/test/shell/unlost-pv.sh @@ -15,47 +15,59 @@ SKIP_WITH_LVMPOLLD=1 . lib/inittest -check_() { - local cache="" - # vgscan needs --cache option for direct scan if lvmetad is used - test -e LOCAL_LVMETAD && cache="--cache" - vgscan $cache 2>&1 | tee vgscan.out - "$@" grep "Inconsistent metadata found for VG $vg" vgscan.out -} - aux prepare_vg 3 lvcreate -an -Zn --type mirror -m 1 -l 1 -n mirror $vg -#lvchange -a n $vg # try orphaning a missing PV (bz45867) aux disable_dev "$dev1" vgreduce --removemissing --force $vg aux enable_dev "$dev1" -check_ -test -e LOCAL_LVMETAD && pvcreate -f "$dev1" -check_ not +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# erase outdated dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out + -# try to just change metadata; we expect the new version (with MISSING_PV set -# on the reappeared volume) to be written out to the previously missing PV vgextend $vg "$dev1" + lvcreate -l 1 -n boo -a n --zero n $vg -aux disable_dev "$dev1" -lvremove $vg/mirror -aux enable_dev "$dev1" -check_ -test -e LOCAL_LVMETAD && lvremove $vg/boo # FIXME trigger a write :-( -check_ not aux disable_dev "$dev1" + +lvremove $vg/mirror + +aux enable_dev "$dev1" + +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# write the vg to update the metadata on dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out + +aux disable_dev "$dev1" + vgreduce --removemissing --force $vg + aux enable_dev "$dev1" vgscan 2>&1 | tee out -grep 'Removing PV' out -vgs 2>&1 | tee out -not grep 'Removing PV' out +vgscan 2>&1 | tee vgscan.out +grep "Inconsistent metadata found for VG $vg" vgscan.out + +# erase outdated dev1 +vgck --updatemetadata $vg + +vgscan 2>&1 | tee vgscan.out +not grep "Inconsistent metadata found for VG $vg" vgscan.out vgremove -ff $vg diff --git a/test/shell/vgck.sh b/test/shell/vgck.sh index 3288d1ba6..b6c2cba08 100644 --- a/test/shell/vgck.sh +++ b/test/shell/vgck.sh @@ -24,11 +24,11 @@ dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" vgscan 2>&1 | tee vgscan.out || true -grep "Failed" vgscan.out +grep "checksum" vgscan.out dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" vgck $vg 2>&1 | tee vgck.out || true -grep Incorrect vgck.out +grep "checksum" vgck.out vgremove -ff $vg diff --git a/tools/polldaemon.c b/tools/polldaemon.c index 528014ce2..b0294ebb3 100644 --- a/tools/polldaemon.c +++ b/tools/polldaemon.c @@ -148,6 +148,7 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, struct logical_volume *lv; int finished = 0; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret; if (!parms->wait_before_testing) @@ -168,12 +169,10 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, } /* Locks the (possibly renamed) VG again */ - vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state); - if (vg_read_error(vg)) { + vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state, &error_flags, NULL); + if (!vg) { /* What more could we do here? */ - log_error("ABORTING: Can't reread VG for %s.", id->display_name); - release_vg(vg); - vg = NULL; + log_error("ABORTING: Can't reread VG for %s error flags %x.", id->display_name, error_flags); ret = 0; goto out; } @@ -395,6 +394,7 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i struct volume_group *vg; struct logical_volume *lv; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret; /* @@ -407,10 +407,9 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i * change done locally. */ - vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state); - if (vg_read_error(vg)) { - release_vg(vg); - log_error("Can't reread VG for %s", id->display_name); + vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state, &error_flags, NULL); + if (!vg) { + log_error("Can't reread VG for %s error flags %x", id->display_name, error_flags); ret = 0; goto out_ret; } diff --git a/tools/toollib.c b/tools/toollib.c index 78b174553..a3b5fea8b 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -189,11 +189,12 @@ static int _printed_clustered_vg_advice = 0; * Case c covers the other errors returned when reading the VG. * If *skip is 1, it's OK for the caller to read the list of PVs in the VG. */ -static int _ignore_vg(struct volume_group *vg, const char *vg_name, - struct dm_list *arg_vgnames, uint32_t read_flags, - int *skip, int *notfound) +static int _ignore_vg(struct cmd_context *cmd, + uint32_t error_flags, struct volume_group *error_vg, + const char *vg_name, struct dm_list *arg_vgnames, + uint32_t read_flags, int *skip, int *notfound) { - uint32_t read_error = vg_read_error(vg); + uint32_t read_error = error_flags; *skip = 0; *notfound = 0; @@ -203,12 +204,9 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, return 0; } - if ((read_error & FAILED_INCONSISTENT) && (read_flags & READ_ALLOW_INCONSISTENT)) - read_error &= ~FAILED_INCONSISTENT; /* Check for other errors */ - if (read_error & FAILED_CLUSTERED) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { - log_error("Cannot access clustered VG %s.", vg->name); + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { + log_error("Cannot access clustered VG %s.", vg_name); if (!_printed_clustered_vg_advice) { _printed_clustered_vg_advice = 1; log_error("See lvmlockd(8) for changing a clvm/clustered VG to a shared VG."); @@ -233,10 +231,13 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * would expect to fail. */ if (read_error & FAILED_SYSTEMID) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.", - vg->name, vg->system_id, vg->cmd->system_id ? "" : "unknown ", - vg->cmd->system_id ? " " : "", vg->cmd->system_id ? vg->cmd->system_id : ""); + vg_name, + error_vg ? error_vg->system_id : "unknown ", + cmd->system_id ? "" : "unknown ", + cmd->system_id ? " " : "", + cmd->system_id ? cmd->system_id : ""); return 1; } else { read_error &= ~FAILED_SYSTEMID; /* Check for other errors */ @@ -255,10 +256,11 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name, * command failed to acquire the necessary lock.) */ if (read_error & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) { - if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) { + if (arg_vgnames && str_list_match_item(arg_vgnames, vg_name)) { if (read_error & FAILED_LOCK_TYPE) log_error("Cannot access VG %s with lock type %s that requires lvmlockd.", - vg->name, vg->lock_type); + vg_name, + error_vg ? error_vg->lock_type : "unknown"); /* For FAILED_LOCK_MODE, the error is printed in vg_read. */ return 1; } else { @@ -1924,10 +1926,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg = NULL; struct vgnameid_list *vgnl; const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int whole_selected = 0; int ret_max = ECMD_PROCESSED; int ret; @@ -1977,13 +1981,18 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, continue; } - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); goto endvg; } + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); + if (skip || notfound) goto endvg; @@ -2004,8 +2013,7 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, ret_max = ret; } - if (!vg_read_error(vg)) - unlock_vg(cmd, vg, vg_name); + unlock_vg(cmd, vg, vg_name); endvg: release_vg(vg); if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) @@ -3590,11 +3598,13 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg = NULL; struct vgnameid_list *vgnl; struct dm_str_list *sl; struct dm_list *tags_arg; struct dm_list lvnames; uint32_t lockd_state = 0; + uint32_t error_flags = 0; const char *vg_name; const char *vg_uuid; const char *vgn; @@ -3663,13 +3673,18 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag continue; } - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, arg_vgnames, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); goto endvg; } + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); + if (skip || notfound) goto endvg; @@ -4294,10 +4309,12 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, log_report_t saved_log_report_state = log_get_report_state(); char uuid[64] __attribute__((aligned(8))); struct volume_group *vg; + struct volume_group *error_vg; struct vgnameid_list *vgnl; const char *vg_name; const char *vg_uuid; uint32_t lockd_state = 0; + uint32_t error_flags = 0; int ret_max = ECMD_PROCESSED; int ret; int skip; @@ -4335,8 +4352,8 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, log_debug("Processing PVs in VG %s", vg_name); - vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state); - if (_ignore_vg(vg, vg_name, NULL, read_flags, &skip, ¬found)) { + vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &error_flags, &error_vg); + if (_ignore_vg(cmd, error_flags, error_vg, vg_name, NULL, read_flags, &skip, ¬found)) { stack; ret_max = ECMD_FAILED; report_log_ret_code(ret_max); @@ -4348,22 +4365,26 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags, goto endvg; /* - * Don't continue when skip is set, because we need to remove - * vg->pvs entries from devices list. + * Don't call "continue" when skip is set, because we need to remove + * error_vg->pvs entries from devices list. */ - ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags, + ret = _process_pvs_in_vg(cmd, vg ? vg : error_vg, all_devices, arg_devices, arg_tags, process_all_pvs, process_all_devices, skip, handle, process_single_pv); if (ret != ECMD_PROCESSED) stack; + report_log_ret_code(ret); + if (ret > ret_max) ret_max = ret; if (!skip) unlock_vg(cmd, vg, vg->name); endvg: + if (error_vg) + unlock_and_release_vg(cmd, error_vg, vg_name); release_vg(vg); if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state)) stack; @@ -5584,7 +5605,7 @@ do_command: if (pp->preserve_existing && pp->orphan_vg_name) { log_debug("Using existing orphan PVs in %s.", pp->orphan_vg_name); - if (!(orphan_vg = vg_read_orphans(cmd, 0, pp->orphan_vg_name))) { + if (!(orphan_vg = vg_read_orphans(cmd, pp->orphan_vg_name))) { log_error("Cannot read orphans VG %s.", pp->orphan_vg_name); goto bad; } diff --git a/tools/vgcfgbackup.c b/tools/vgcfgbackup.c index 5a0fb13fa..49779cabe 100644 --- a/tools/vgcfgbackup.c +++ b/tools/vgcfgbackup.c @@ -67,12 +67,6 @@ static int _vg_backup_single(struct cmd_context *cmd, const char *vg_name, if (!backup_to_file(filename, vg->cmd->cmd_line, vg)) return_ECMD_FAILED; } else { - if (vg_read_error(vg) == FAILED_INCONSISTENT) { - log_error("No backup taken: specify filename with -f " - "to backup an inconsistent VG"); - return ECMD_FAILED; - } - if (vg_missing_pv_count(vg)) { log_error("No backup taken: specify filename with -f to backup with missing PVs."); return ECMD_FAILED; @@ -116,7 +110,7 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv) init_pvmove(1); - ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_ALLOW_INCONSISTENT, 0, + ret = process_each_vg(cmd, argc, argv, NULL, NULL, 0, 0, handle, &_vg_backup_single); free(last_filename); diff --git a/tools/vgextend.c b/tools/vgextend.c index e50a8182d..da768981d 100644 --- a/tools/vgextend.c +++ b/tools/vgextend.c @@ -28,16 +28,25 @@ static int _restore_pv(struct volume_group *vg, const char *pv_name) return 0; } - if (!(pvl->pv->status & MISSING_PV)) { - log_warn("WARNING: PV %s was not missing in VG %s", pv_name, vg->name); - return 0; - } - if (!pvl->pv->dev) { log_warn("WARNING: The PV %s is still missing.", pv_name); return 0; } + if (pvl->pv->status & MISSING_PV) + goto clear_flag; + + /* + * when the PV has no used PE's vg_read clears the MISSING_PV flag + * and sets this so we know. + */ + if (pvl->pv->unused_missing_cleared) + goto clear_flag; + + log_warn("WARNING: PV %s was not missing in VG %s", pv_name, vg->name); + return 0; + +clear_flag: pvl->pv->status &= ~MISSING_PV; return 1; } diff --git a/tools/vgsplit.c b/tools/vgsplit.c index 8dd81ad82..abf70130b 100644 --- a/tools/vgsplit.c +++ b/tools/vgsplit.c @@ -456,6 +456,7 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, int *existing_vg) { struct volume_group *vg_to = NULL; + int exists = 0; log_verbose("Checking for new volume group \"%s\"", vg_name_to); /* @@ -468,13 +469,13 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd, * we obtained a WRITE lock and could not find the vgname in the * system. Thus, the split will be into a new VG. */ - vg_to = vg_lock_and_create(cmd, vg_name_to); - if (vg_read_error(vg_to) == FAILED_LOCKING) { + vg_to = vg_lock_and_create(cmd, vg_name_to, &exists); + if (!vg_to && !exists) { log_error("Can't get lock for %s", vg_name_to); release_vg(vg_to); return NULL; } - if (vg_read_error(vg_to) == FAILED_EXIST) { + if (!vg_to && exists) { *existing_vg = 1; release_vg(vg_to); vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0, 0); From a3a676e0e790b5e76aa57a4d2bb1dadedcf6bb5a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 24 May 2019 12:23:08 -0500 Subject: [PATCH 17/22] metadata.c: removed unused code if 0 was placed around old vg_read code by the previous commit. --- lib/metadata/metadata.c | 1189 --------------------------------------- 1 file changed, 1189 deletions(-) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 90409b359..e0a51143a 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -413,50 +413,6 @@ int add_pv_to_vg(struct volume_group *vg, const char *pv_name, return 1; } -#if 0 -static int _copy_pv(struct dm_pool *pvmem, - struct physical_volume *pv_to, - struct physical_volume *pv_from) -{ - memcpy(pv_to, pv_from, sizeof(*pv_to)); - - /* We must use pv_set_fid here to update the reference counter! */ - pv_to->fid = NULL; - pv_set_fid(pv_to, pv_from->fid); - - if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name))) - return_0; - - if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags)) - return_0; - - if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments)) - return_0; - - return 1; -} - -static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from) -{ - struct pv_list *pvl_to = NULL; - - if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to)))) - return_NULL; - - if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv)))) - goto_bad; - - if (!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv)) - goto_bad; - - return pvl_to; - -bad: - dm_pool_free(pvmem, pvl_to); - return NULL; -} -#endif - static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to, const char *pv_name, int enforce_pv_from_source) { @@ -1008,38 +964,6 @@ static int _vg_update_embedded_copy(struct volume_group *vg, struct volume_group return 1; } -#if 0 -/* - * Create a (struct volume_group) volume group handle from a struct volume_group pointer and a - * possible failure code or zero for success. - */ -static struct volume_group *_vg_make_handle(struct cmd_context *cmd, - struct volume_group *vg, - uint32_t failure) -{ - /* Never return a cached VG structure for a failure */ - if (vg && vg->vginfo && failure != SUCCESS) { - release_vg(vg); - vg = NULL; - } - - if (!vg && !(vg = alloc_vg("vg_make_handle", cmd, NULL))) - return_NULL; - - vg->read_status = failure; - - /* - * If we hold a write lock and might be changing the VG contents, embed a pristine - * copy of the VG metadata for the activation code to use later - */ - if (vg->fid && !dm_pool_locked(vg->vgmem) && !vg->vg_committed && !is_orphan_vg(vg->name)) - if (vg_write_lock_held() && !_vg_update_embedded_copy(vg, &vg->vg_committed)) - vg->read_status |= FAILED_ALLOCATION; - - return vg; -} -#endif - int lv_has_unknown_segments(const struct logical_volume *lv) { struct lv_segment *seg; @@ -3254,108 +3178,12 @@ void vg_revert(struct volume_group *vg) } } -#if 0 -static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) -{ - int *in_use = _in_use; - if (!mda_is_ignored(mda)) - *in_use = 1; - return 1; -} -#endif - struct _vg_read_orphan_baton { struct cmd_context *cmd; struct volume_group *vg; const struct format_type *fmt; }; -/* - * If we know that the PV is orphan, meaning there's at least one MDA on - * that PV which does not reference any VG and at the same time there's - * PV_EXT_USED flag set, we're certainly in an inconsistent state and we - * need to fix this. - * - * For example, such situation can happen during vgremove/vgreduce if we - * removed/reduced the VG, but we haven't written PV headers yet because - * vgremove stopped abruptly for whatever reason just before writing new - * PV headers with updated state, including PV extension flags (and so the - * PV_EXT_USED flag). - * - * However, in case the PV has no MDAs at all, we can't double-check - * whether the PV_EXT_USED is correct or not - if that PV is marked - * as used, it's either: - * - really used (but other disks with MDAs are missing) - * - or the error state as described above is hit - * - * User needs to overwrite the PV header directly if it's really clear - * the PV having no MDAs does not belong to any VG and at the same time - * it's still marked as being in use (pvcreate -ff will fix this). - * - * Note that the above doesn't account for the case where the PV has - * VG metadata that fails to be parsed. In that case, the PV looks - * like an in-use orphan, and is auto-repaired here. A PV with - * unparsable metadata should be kept on a special list of devices - * (like duplicate PVs) that are not auto-repaired, cannot be used - * by pvcreate, and are displayed with a special flag by 'pvs'. - */ - -#if 0 -static int _check_or_repair_orphan_pv_ext(struct physical_volume *pv, - struct lvmcache_info *info, - struct _vg_read_orphan_baton *b) -{ - uint32_t ext_version = lvmcache_ext_version(info); - uint32_t ext_flags = lvmcache_ext_flags(info); - int at_least_one_mda_used; - - /* - * Nothing to do if PV header extension < 2: - * - version 0 is PV header without any extensions, - * - version 1 has bootloader area support only and - * we're not checking anything for that one here. - */ - if (ext_version < 2) { - b->consistent = 1; - return 1; - } - - if (ext_flags & PV_EXT_USED) { - if (lvmcache_mda_count(info)) { - at_least_one_mda_used = 0; - lvmcache_foreach_mda(info, _check_mda_in_use, &at_least_one_mda_used); - - /* - * We've found a PV that is marked as used with PV_EXT_USED flag - * and it's orphan at the same time while it contains MDAs. - * This is incorrect state and it needs to be fixed. - * The PV_EXT_USED flag needs to be dropped! - */ - if (b->repair) { - if (at_least_one_mda_used) { - log_warn("WARNING: Repairing flag incorrectly marking " - "Physical Volume %s as used.", pv_dev_name(pv)); - - /* pv_write will set correct ext_flags */ - if (!pv_write(b->cmd, pv, 0)) { - b->consistent = 0; - log_error("Failed to repair physical volume \"%s\".", - pv_dev_name(pv)); - return 0; - } - } - b->consistent = 1; - } else if (at_least_one_mda_used) { - /* mark as inconsistent only if there's at least 1 MDA used */ - b->consistent = 0; - } - } - } - - return 1; -} -#endif - static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton) { struct _vg_read_orphan_baton *b = baton; @@ -3496,42 +3324,6 @@ struct volume_group *vg_read_orphans(struct cmd_context *cmd, const char *orphan return vg; } -#if 0 -static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg) -{ - struct pv_list *pvl, *pvl2; - - dm_list_iterate_items(pvl, &vg->pvs) { - dm_list_iterate_items(pvl2, all_pvs) { - if (pvl->pv->dev == pvl2->pv->dev) - goto next_pv; - } - - /* - * PV is not on list so add it. - */ - if (!(pvl2 = _copy_pvl(pvmem, pvl))) { - log_error("pv_list allocation for '%s' failed", - pv_dev_name(pvl->pv)); - return 0; - } - dm_list_add(all_pvs, &pvl2->list); - next_pv: - ; - } - - return 1; -} - -static void _free_pv_list(struct dm_list *all_pvs) -{ - struct pv_list *pvl; - - dm_list_iterate_items(pvl, all_pvs) - pvl->pv->fid->fmt->ops->destroy_instance(pvl->pv->fid); -} -#endif - static void _destroy_fid(struct format_instance **fid) { if (*fid) { @@ -3551,675 +3343,6 @@ int vg_missing_pv_count(const struct volume_group *vg) return ret; } -#if 0 -static int _check_reappeared_pv(struct volume_group *correct_vg, - struct physical_volume *pv, int act) -{ - struct pv_list *pvl; - int rv = 0; - - /* - * Skip these checks in case the tool is going to deal with missing - * PVs, especially since the resulting messages can be pretty - * confusing. - */ - if (correct_vg->cmd->handles_missing_pvs) - return rv; - - /* - * Skip this if there is no underlying device present for this PV. - */ - if (!pv->dev) - return rv; - - dm_list_iterate_items(pvl, &correct_vg->pvs) - if (pv->dev == pvl->pv->dev && is_missing_pv(pvl->pv)) { - if (act) - log_warn("WARNING: Missing device %s reappeared, updating " - "metadata for VG %s to version %u.", - pv_dev_name(pvl->pv), pv_vg_name(pvl->pv), - correct_vg->seqno); - if (pvl->pv->pe_alloc_count == 0) { - if (act) { - pv->status &= ~MISSING_PV; - pvl->pv->status &= ~MISSING_PV; - } - ++ rv; - } else if (act) - log_warn("WARNING: Device %s still marked missing because of allocated data " - "on it, remove volumes and consider vgreduce --removemissing.", - pv_dev_name(pvl->pv)); - } - - return rv; -} - -static int _is_foreign_vg(struct volume_group *vg) -{ - return vg->cmd->system_id && strcmp(vg->system_id, vg->cmd->system_id); -} - -static int _repair_inconsistent_vg(struct volume_group *vg, uint32_t lockd_state) -{ - unsigned saved_handles_missing_pvs = vg->cmd->handles_missing_pvs; - - if (lvmcache_found_duplicate_pvs()) { - log_debug_metadata("Skip metadata repair with duplicates."); - return 0; - } - - /* Cannot write foreign VGs, the owner will repair it. */ - if (_is_foreign_vg(vg)) { - log_verbose("Skip metadata repair for foreign VG."); - return 0; - } - - if (vg_is_shared(vg) && !(lockd_state & LDST_EX)) { - log_verbose("Skip metadata repair for shared VG without exclusive lock."); - return 0; - } - - log_warn("WARNING: Inconsistent metadata found for VG %s - updating to use version %u", vg->name, vg->seqno); - - vg->cmd->handles_missing_pvs = 1; - if (!vg_write(vg)) { - log_error("Automatic metadata correction failed"); - vg->cmd->handles_missing_pvs = saved_handles_missing_pvs; - return 0; - } - - vg->cmd->handles_missing_pvs = saved_handles_missing_pvs; - - if (!vg_commit(vg)) { - log_error("Automatic metadata correction commit failed"); - return 0; - } - - return 1; -} - -/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct - * inconsistent metadata on disk (i.e. the VG write lock is held). - * This guarantees only consistent metadata is returned. - * If consistent is 0, caller must check whether consistent == 1 on return - * and take appropriate action if it isn't (e.g. abort; get write lock - * and call vg_read_internal again). - * - * If precommitted is set, use precommitted metadata if present. - * - * Either of vgname or vgid may be NULL. - * - * Note: vginfo structs must not be held or used as parameters - * across the call to this function. - */ -static struct volume_group *_vg_read(struct cmd_context *cmd, - const char *vgname, - const char *vgid, - uint32_t lockd_state, - uint32_t warn_flags, - int enable_repair, - int *mdas_consistent, - unsigned precommitted) -{ - struct format_instance *fid = NULL; - struct format_instance_ctx fic; - const struct format_type *fmt; - struct volume_group *vg, *correct_vg = NULL; - struct metadata_area *mda; - struct lvmcache_info *info; - int inconsistent = 0; - int inconsistent_vgid = 0; - int inconsistent_pvs = 0; - int inconsistent_mdas = 0; - int inconsistent_mda_count = 0; - int strip_historical_lvs = enable_repair; - unsigned use_precommitted = precommitted; - struct dm_list *pvids; - struct pv_list *pvl; - struct dm_list all_pvs; - char uuid[64] __attribute__((aligned(8))); - int skipped_rescan = 0; - struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ - unsigned use_previous_vg; - - *mdas_consistent = 1; - - if (is_orphan_vg(vgname)) { - log_very_verbose("Reading VG %s", vgname); - - if (use_precommitted) { - log_error(INTERNAL_ERROR "vg_read_internal requires vgname " - "with pre-commit."); - return NULL; - } - return vg_read_orphans(cmd, warn_flags, vgname); - } - - uuid[0] = '\0'; - if (vgid && !id_write_format((const struct id*)vgid, uuid, sizeof(uuid))) - stack; - - log_very_verbose("Reading VG %s %s", vgname ?: "", vgid ? uuid : ""); - - /* - * Rescan the devices that are associated with this vg in lvmcache. - * This repeats what was done by the command's initial label scan, - * but only the devices associated with this VG. - * - * The lvmcache info about these devs is from the initial label scan - * performed by the command before the vg lock was held. Now the VG - * lock is held, so we rescan all the info from the devs in case - * something changed between the initial scan and now that the lock - * is held. - * - * Some commands (e.g. reporting) are fine reporting data read by - * the label scan. It doesn't matter if the devs changed between - * the label scan and here, we can report what was seen in the - * scan, even though it is the old state, since we will not be - * making any modifications. If the VG was being modified during - * the scan, and caused us to see inconsistent metadata on the - * different PVs in the VG, then we do want to rescan the devs - * here to get a consistent view of the VG. Note that we don't - * know if the scan found all the PVs in the VG at this point. - * We don't know that until vg_read looks at the list of PVs in - * the metadata and compares that to the devices found by the scan. - * - * It's possible that a change made to the VG during scan was - * adding or removing a PV from the VG. In this case, the list - * of devices associated with the VG in lvmcache would change - * due to the rescan. - * - * The devs in the VG may be persistently inconsistent due to some - * previous problem. In this case, rescanning the labels here will - * find the same inconsistency. The VG repair (mistakenly done by - * vg_read below) is supposed to fix that. - * - * FIXME: sort out the usage of the global lock (which is mixed up - * with the orphan lock), and when we can tell that the global - * lock is taken prior to the label scan, and still held here, - * we can also skip the rescan in that case. - */ - if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { - /* the skip rescan special case is for clvmd vg_read_by_vgid */ - /* FIXME: this is not a warn flag, pass this differently */ - if (warn_flags & SKIP_RESCAN) - goto find_vg; - skipped_rescan = 0; - log_debug_metadata("Rescanning devices for %s", vgname); - lvmcache_label_rescan_vg(cmd, vgname, vgid); - } else { - log_debug_metadata("Skipped rescanning devices for %s", vgname); - skipped_rescan = 1; - } - - find_vg: - - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { - log_debug_metadata("Cache did not find fmt for vgname %s", vgname); - return_NULL; - } - - /* Now determine the correct vgname if none was supplied */ - if (!vgname && !(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { - log_debug_metadata("Cache did not find VG name from vgid %s", uuid); - return_NULL; - } - - /* Determine the correct vgid if none was supplied */ - if (!vgid && !(vgid = lvmcache_vgid_from_vgname(cmd, vgname))) { - log_debug_metadata("Cache did not find VG vgid from name %s", vgname); - return_NULL; - } - - if (use_precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; - - /* - * A "format instance" is an abstraction for a VG location, - * i.e. where a VG's metadata exists on disk. - * - * An fic (format_instance_ctx) is a temporary struct used - * to create an fid (format_instance). The fid hangs around - * and is used to create a 'vg' to which it connected (vg->fid). - * - * The 'fic' describes a VG in terms of fmt/name/id. - * - * The 'fid' describes a VG in more detail than the fic, - * holding information about where to find the VG metadata. - * - * The 'vg' describes the VG in the most detail representing - * all the VG metadata. - * - * The fic and fid are set up by create_instance() to describe - * the VG location. This happens before the VG metadata is - * assembled into the more familiar struct volume_group "vg". - * - * The fid has one main purpose: to keep track of the metadata - * locations for a given VG. It does this by putting 'mda' - * structs on fid->metadata_areas_in_use, which specify where - * metadata is located on disk. It gets this information - * (metadata locations for a specific VG) from the command's - * initial label scan. The info is passed indirectly via - * lvmcache info/vginfo structs, which are created by the - * label scan and then copied into fid by create_instance(). - */ - - /* create format instance with appropriate metadata area */ - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); - return NULL; - } - - /* Store pvids for later so we can check if any are missing */ - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - _destroy_fid(&fid); - return_NULL; - } - - /* - * We use the fid globally here so prevent the release_vg - * call to destroy the fid - we may want to reuse it! - */ - fid->ref_count++; - /* Ensure contents of all metadata areas match - else do recovery */ - inconsistent_mda_count=0; - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - struct device *mda_dev = mda_get_device(mda); - - use_previous_vg = 0; - - log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev)); - - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; - continue; - } - - if (vg) - set_pv_devices(fid, vg); - - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; - continue; - } - - if (!correct_vg) { - correct_vg = vg; - continue; - } - - /* FIXME Also ensure contents same - checksum compare? */ - if (correct_vg->seqno != vg->seqno) { - if (cmd->metadata_read_only || skipped_rescan) - log_warn("Not repairing metadata for VG %s.", vgname); - else - inconsistent = 1; - - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } - - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } - } - fid->ref_count--; - - /* Ensure every PV in the VG was in the cache */ - if (correct_vg) { - /* - * Update the seqno from the cache, for the benefit of - * retro-style metadata formats like LVM1. - */ - // correct_vg->seqno = seqno > correct_vg->seqno ? seqno : correct_vg->seqno; - - /* - * If the VG has PVs without mdas, or ignored mdas, they may - * still be orphans in the cache: update the cache state here, - * and update the metadata lists in the vg. - */ - if (!inconsistent && - dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) { - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (!pvl->pv->dev) { - inconsistent_pvs = 1; - break; - } - - if (str_list_match_item(pvids, pvl->pv->dev->pvid)) - continue; - - /* - * PV not marked as belonging to this VG in cache. - * Check it's an orphan without metadata area - * not ignored. - */ - if (!(info = lvmcache_info_from_pvid(pvl->pv->dev->pvid, pvl->pv->dev, 1)) || - !lvmcache_is_orphan(info)) { - inconsistent_pvs = 1; - break; - } - - if (lvmcache_mda_count(info)) { - if (!lvmcache_fid_add_mdas_pv(info, fid)) { - release_vg(correct_vg); - return_NULL; - } - - log_debug_metadata("Empty mda found for VG %s on %s.", - vgname, dev_name(pvl->pv->dev)); - -#if 0 - /* - * If we are going to do any repair we have to be using - * the latest metadata on disk, so we have to rescan devs - * if we skipped that at the start of the vg_read. We'll - * likely come back through here, but without having - * skipped_rescan. - * - * FIXME: in some cases we don't want to do this. - */ - if (skipped_rescan && cmd->can_use_one_scan) { - log_debug_metadata("Restarting read to rescan devs."); - cmd->can_use_one_scan = 0; - release_vg(correct_vg); - correct_vg = NULL; - lvmcache_del(info); - label_read(pvl->pv->dev); - goto restart_scan; - } -#endif - - if (inconsistent_mdas) - continue; - - /* - * If any newly-added mdas are in-use then their - * metadata needs updating. - */ - lvmcache_foreach_mda(info, _check_mda_in_use, - &inconsistent_mdas); - } - } - - /* If the check passed, let's update VG and recalculate pvids */ - if (!inconsistent_pvs) { - log_debug_metadata("Updating cache for PVs without mdas " - "in VG %s.", vgname); - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg_from_read(correct_vg, correct_vg->status & PRECOMMITTED); - - if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid))) { - release_vg(correct_vg); - return_NULL; - } - } - } - - fid->ref_count++; - if (dm_list_size(&correct_vg->pvs) != - dm_list_size(pvids) + vg_missing_pv_count(correct_vg)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - - if (prioritized_section()) - inconsistent = 1; - else { - release_vg(correct_vg); - correct_vg = NULL; - } - } else dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (is_missing_pv(pvl->pv)) - continue; - if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) { - log_debug_metadata("Cached VG %s had incorrect PV list", - vgname); - release_vg(correct_vg); - correct_vg = NULL; - break; - } - } - - if (correct_vg && inconsistent_mdas) { - release_vg(correct_vg); - correct_vg = NULL; - } - fid->ref_count--; - } - - dm_list_init(&all_pvs); - - /* Failed to find VG where we expected it - full scan and retry */ - if (!correct_vg) { - /* - * Free outstanding format instance that remained unassigned - * from previous step where we tried to get the "correct_vg", - * but we failed to do so (so there's a dangling fid now). - */ - _destroy_fid(&fid); - vg_fmtdata = NULL; - - inconsistent = 0; - - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) - return_NULL; - - if (precommitted && !(fmt->features & FMT_PRECOMMIT)) - use_precommitted = 0; - - /* create format instance with appropriate metadata area */ - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = fmt->ops->create_instance(fmt, &fic))) { - log_error("Failed to create format instance"); - return NULL; - } - - /* - * We use the fid globally here so prevent the release_vg - * call to destroy the fid - we may want to reuse it! - */ - fid->ref_count++; - /* Ensure contents of all metadata areas match - else recover */ - inconsistent_mda_count=0; - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - use_previous_vg = 0; - - if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || - (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { - inconsistent = 1; - vg_fmtdata = NULL; - continue; - } - - if (vg) - set_pv_devices(fid, vg); - - /* Use previous VG because checksum matches */ - if (!vg) { - vg = correct_vg; - continue; - } - - if (!correct_vg) { - correct_vg = vg; - if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - return_NULL; - } - continue; - } - - if (!id_equal(&vg->id, &correct_vg->id)) { - inconsistent = 1; - inconsistent_vgid = 1; - } - - /* FIXME Also ensure contents same - checksums same? */ - if (correct_vg->seqno != vg->seqno) { - /* Ignore inconsistent seqno if told to skip repair logic */ - if (cmd->metadata_read_only || skipped_rescan) - log_warn("Not repairing metadata for VG %s.", vgname); - else - inconsistent = 1; - - if (!_update_pv_list(cmd->mem, &all_pvs, vg)) { - _free_pv_list(&all_pvs); - fid->ref_count--; - release_vg(vg); - release_vg(correct_vg); - return_NULL; - } - if (vg->seqno > correct_vg->seqno) { - release_vg(correct_vg); - correct_vg = vg; - } else { - mda->status |= MDA_INCONSISTENT; - ++inconsistent_mda_count; - } - } - - if (vg != correct_vg) { - release_vg(vg); - vg_fmtdata = NULL; - } - } - fid->ref_count--; - - /* Give up looking */ - if (!correct_vg) { - _free_pv_list(&all_pvs); - _destroy_fid(&fid); - return_NULL; - } - } - - /* - * If there is no precommitted metadata, committed metadata - * is read and stored in the cache even if use_precommitted is set - */ - lvmcache_update_vg_from_read(correct_vg, (correct_vg->status & PRECOMMITTED)); - - if (inconsistent) { - /* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */ - if (use_precommitted) { - log_error("Inconsistent pre-commit metadata copies " - "for volume group %s", vgname); - - /* - * Check whether all of the inconsistent MDAs were on - * MISSING PVs -- in that case, we should be safe. - */ - dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { - if (mda->status & MDA_INCONSISTENT) { - log_debug_metadata("Checking inconsistent MDA: %s", dev_name(mda_get_device(mda))); - dm_list_iterate_items(pvl, &correct_vg->pvs) { - if (mda_get_device(mda) == pvl->pv->dev && - (pvl->pv->status & MISSING_PV)) - --inconsistent_mda_count; - } - } - } - - if (inconsistent_mda_count < 0) - log_error(INTERNAL_ERROR "Too many inconsistent MDAs."); - - if (!inconsistent_mda_count) { - _free_pv_list(&all_pvs); - return correct_vg; - } - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; - } - - if (!enable_repair) { - _free_pv_list(&all_pvs); - *mdas_consistent = 0; - return correct_vg; - } - - if (skipped_rescan) { - log_warn("Not repairing metadata for VG %s.", vgname); - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return_NULL; - } - - /* Don't touch if vgids didn't match */ - if (inconsistent_vgid) { - log_warn("WARNING: Inconsistent metadata UUIDs found for volume group %s.", vgname); - _free_pv_list(&all_pvs); - *mdas_consistent = 0; - return correct_vg; - } - - /* - * If PV is marked missing but we found it, - * update metadata and remove MISSING flag - */ - dm_list_iterate_items(pvl, &all_pvs) - _check_reappeared_pv(correct_vg, pvl->pv, 1); - - if (!_repair_inconsistent_vg(correct_vg, lockd_state)) { - _free_pv_list(&all_pvs); - release_vg(correct_vg); - return NULL; - } - } - - _free_pv_list(&all_pvs); - - if (vg_missing_pv_count(correct_vg)) { - log_verbose("There are %d physical volumes missing.", - vg_missing_pv_count(correct_vg)); - vg_mark_partial_lvs(correct_vg, 1); - } - - if ((correct_vg->status & PVMOVE) && !pvmove_mode()) { - log_error("Interrupted pvmove detected in volume group %s.", - correct_vg->name); - log_print("Please restore the metadata by running vgcfgrestore."); - release_vg(correct_vg); - return NULL; - } - - if (correct_vg && enable_repair && !skipped_rescan) { - if (strip_historical_lvs && !vg_strip_outdated_historical_lvs(correct_vg)) { - release_vg(correct_vg); - return_NULL; - } - } - - if (inconsistent_pvs) - *mdas_consistent = 0; - - return correct_vg; -} -#endif - #define DEV_LIST_DELIM ", " static int _check_devs_used_correspond_with_lv(struct dm_pool *mem, struct dm_list *list, struct logical_volume *lv) @@ -4355,68 +3478,6 @@ static int _check_devs_used_correspond_with_vg(struct volume_group *vg) return 1; } -#if 0 -struct volume_group *vg_read_internal(struct cmd_context *cmd, - const char *vgname, const char *vgid, - uint32_t lockd_state, uint32_t warn_flags, - int enable_repair, - int *mdas_consistent) -{ - struct volume_group *vg; - struct lv_list *lvl; - - if (!(vg = _vg_read(cmd, vgname, vgid, lockd_state, - warn_flags, enable_repair, mdas_consistent, 0))) - goto_out; - - if (!check_pv_dev_sizes(vg)) - log_warn("One or more devices used as PVs in VG %s " - "have changed sizes.", vg->name); - - if (!check_pv_segments(vg)) { - log_error(INTERNAL_ERROR "PV segments corrupted in %s.", - vg->name); - release_vg(vg); - vg = NULL; - goto out; - } - - dm_list_iterate_items(lvl, &vg->lvs) { - if (!check_lv_segments(lvl->lv, 0)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - dm_list_iterate_items(lvl, &vg->lvs) { - /* - * Checks that cross-reference other LVs. - */ - if (!check_lv_segments(lvl->lv, 1)) { - log_error(INTERNAL_ERROR "LV segments corrupted in %s.", - lvl->lv->name); - release_vg(vg); - vg = NULL; - goto out; - } - } - - (void) _check_devs_used_correspond_with_vg(vg); -out: - if (!*mdas_consistent && (warn_flags & WARN_INCONSISTENT)) { - if (is_orphan_vg(vgname)) - log_warn("WARNING: Found inconsistent standalone Physical Volumes."); - else - log_warn("WARNING: Volume Group %s is not consistent.", vgname); - } - - return vg; -} -#endif - void free_pv_fid(struct physical_volume *pv) { if (!pv) @@ -4727,50 +3788,6 @@ int vg_check_status(const struct volume_group *vg, uint64_t status) return !vg_bad_status_bits(vg, status); } -#if 0 -/* - * VG is left unlocked on failure - */ -static struct volume_group *_recover_vg(struct cmd_context *cmd, - const char *vg_name, const char *vgid, - int is_shared, uint32_t lockd_state) -{ - int mdas_consistent = 0; - struct volume_group *vg; - uint32_t state = 0; - - unlock_vg(cmd, NULL, vg_name); - - if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL)) - return_NULL; - - /* - * Convert vg lock in lvmlockd from sh to ex. - */ - if (is_shared && !(lockd_state & LDST_FAIL) && !(lockd_state & LDST_EX)) { - log_debug("Upgrade lvmlockd lock to repair vg %s.", vg_name); - if (!lockd_vg(cmd, vg_name, "ex", 0, &state)) { - log_warn("Skip repair for shared VG without exclusive lock."); - return NULL; - } - lockd_state |= LDST_EX; - } - - if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, 0, 1, &mdas_consistent))) { - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - if (!mdas_consistent) { - release_vg(vg); - unlock_vg(cmd, NULL, vg_name); - return_NULL; - } - - return (struct volume_group *)vg; -} -#endif - static int _allow_extra_system_id(struct cmd_context *cmd, const char *system_id) { const struct dm_config_node *cn; @@ -4945,212 +3962,6 @@ static int _access_vg_systemid(struct cmd_context *cmd, struct volume_group *vg) return 0; } -#if 0 -/* - * FIXME: move vg_bad_status_bits() checks in here. - */ -static int _vg_access_permitted(struct cmd_context *cmd, struct volume_group *vg, - uint32_t lockd_state, uint32_t *failure) -{ - if (!is_real_vg(vg->name)) { - return 1; - } - - if (!_access_vg_clustered(cmd, vg)) { - *failure |= FAILED_CLUSTERED; - return 0; - } - - if (!_access_vg_lock_type(cmd, vg, lockd_state, failure)) { - /* Either FAILED_LOCK_TYPE or FAILED_LOCK_MODE were set. */ - return 0; - } - - if (!_access_vg_systemid(cmd, vg)) { - *failure |= FAILED_SYSTEMID; - return 0; - } - - return 1; -} - -/* - * Consolidated locking, reading, and status flag checking. - * - * If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in - * read_flags will return it with FAILED_INCONSISTENT set instead of - * giving you nothing. - * - * Use vg_read_error(vg) to determine the result. Nonzero means there were - * problems reading the volume group. - * Zero value means that the VG is open and appropriate locks are held. - */ -static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, - uint32_t lock_flags, - uint64_t status_flags, - uint32_t read_flags, - uint32_t lockd_state) -{ - struct volume_group *vg = NULL; - uint32_t failure = 0; - uint32_t warn_flags = 0; - int mdas_consistent = 1; - int enable_repair = 1; - int is_shared = 0; - - if ((read_flags & READ_ALLOW_INCONSISTENT) || (lock_flags != LCK_VG_WRITE)) { - enable_repair = 0; - warn_flags |= WARN_INCONSISTENT; - } - - if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) { - log_error("Volume group name \"%s\" has invalid characters.", - vg_name); - return NULL; - } - - if (!lock_vol(cmd, vg_name, lock_flags, NULL)) { - log_error("Can't get lock for %s", vg_name); - return _vg_make_handle(cmd, vg, FAILED_LOCKING); - } - - if (is_orphan_vg(vg_name)) - status_flags &= ~LVM_WRITE; - - if (!(vg = vg_read_internal(cmd, vg_name, vgid, lockd_state, warn_flags, enable_repair, &mdas_consistent))) { - if (!(read_flags & READ_OK_NOTFOUND)) - log_error("Volume group \"%s\" not found", vg_name); - failure |= FAILED_NOTFOUND; - goto bad; - } - - if (!_vg_access_permitted(cmd, vg, lockd_state, &failure)) - goto bad; - - /* - * If we called vg_read_internal above without repair enabled, - * and the read found inconsistent mdas, then then get a write/ex - * lock and call it again with repair enabled so it will fix - * the inconsistent mdas. - * - * FIXME: factor vg repair out of vg_read. The vg_read caller - * should get an error about the vg have problems and then call - * a repair-specific function if it wants to. (NB there are - * other kinds of repairs hidden in _vg_read that should be - * pulled out in addition to _recover_vg). - */ - if (!mdas_consistent && !enable_repair) { - is_shared = vg_is_shared(vg); - release_vg(vg); - - if (!(vg = _recover_vg(cmd, vg_name, vgid, is_shared, lockd_state))) { - if (is_orphan_vg(vg_name)) - log_error("Recovery of standalone physical volumes failed."); - else - log_error("Recovery of volume group \"%s\" failed.", vg_name); - failure |= FAILED_RECOVERY; - goto bad_no_unlock; - } - } - - /* - * Check that the tool can handle tricky cases -- missing PVs and - * unknown segment types. - */ - - if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s while PVs are missing.", vg->name); - log_error("Consider vgreduce --removemissing."); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; - } - - if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) && - lock_flags == LCK_VG_WRITE) { - log_error("Cannot change VG %s with unknown segments in it!", - vg->name); - failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */ - goto bad; - } - - failure |= vg_bad_status_bits(vg, status_flags); - if (failure) - goto_bad; - - if (!(vg = _vg_make_handle(cmd, vg, failure)) || vg_read_error(vg)) - unlock_vg(cmd, vg, vg_name); - - return vg; - -bad: - unlock_vg(cmd, vg, vg_name); - -bad_no_unlock: - return _vg_make_handle(cmd, vg, failure); -} - -/* - * vg_read: High-level volume group metadata read function. - * - * vg_read_error() must be used on any handle returned to check for errors. - * - * - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT - * - VG is read-only: FAILED_READ_ONLY - * - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED - * - VG is not RESIZEABLE: FAILED_RESIZEABLE - * - locking failed: FAILED_LOCKING - * - * On failures, all locks are released, unless one of the following applies: - * - vgname_is_locked(lock_name) is true - * FIXME: remove the above 2 conditions if possible and make an error always - * release the lock. - * - * Volume groups are opened read-only unless flags contains READ_FOR_UPDATE. - * - * Checking for VG existence: - * - * FIXME: We want vg_read to attempt automatic recovery after acquiring a - * temporary write lock: if that fails, we bail out as usual, with failed & - * FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in - * toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with - * *consistent = 1. - */ -struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - uint64_t status_flags = UINT64_C(0); - uint32_t lock_flags = LCK_VG_READ; - - if (read_flags & READ_FOR_UPDATE) { - status_flags |= EXPORTED_VG | LVM_WRITE; - lock_flags = LCK_VG_WRITE; - } - - if (read_flags & READ_ALLOW_EXPORTED) - status_flags &= ~EXPORTED_VG; - - return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status_flags, read_flags, lockd_state); -} - -/* - * A high-level volume group metadata reading function. Open a volume group for - * later update (this means the user code can change the metadata and later - * request the new metadata to be written and committed). - */ -struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name, - const char *vgid, uint32_t read_flags, uint32_t lockd_state) -{ - struct volume_group *vg = vg_read(cmd, vg_name, vgid, read_flags | READ_FOR_UPDATE, lockd_state); - - if (!vg || vg_read_error(vg)) - stack; - - return vg; -} -#endif - /* * Test the validity of a VG handle returned by vg_read() or vg_read_for_update(). */ From d3636ff8322f17bac66ac6fe79e2c0710d645260 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 12 Apr 2019 10:55:19 -0500 Subject: [PATCH 18/22] tests: add missing-pv missing-pv-unused --- test/shell/missing-pv-unused.sh | 86 ++++++++++++++++++ test/shell/missing-pv.sh | 152 ++++++++++++++++++++++++++++++++ 2 files changed, 238 insertions(+) create mode 100644 test/shell/missing-pv-unused.sh create mode 100644 test/shell/missing-pv.sh diff --git a/test/shell/missing-pv-unused.sh b/test/shell/missing-pv-unused.sh new file mode 100644 index 000000000..2265a5bd9 --- /dev/null +++ b/test/shell/missing-pv-unused.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +lvcreate -n $lv1 -L8M $vg "$dev2" +lvcreate -n $lv2 -L8M $vg "$dev3" +lvcreate -n $lv3 -L8M $vg "$dev2" +lvcreate -n $lv4 -L8M $vg "$dev3" + +vgchange -an $vg + +pvs +vgs +lvs -a -o+devices + +# Fail device that is not used by any LVs. +aux disable_dev "$dev1" + +pvs +vgs +lvs -a -o+devices + +# Cannot do normal activation of LVs not using failed PV. +lvchange -ay $vg/$lv1 +lvchange -ay $vg/$lv2 + +vgchange -an $vg + +# Check that MISSING flag is not set in ondisk metadata. +pvck --dump metadata "$dev2" > meta +not grep MISSING meta +rm meta + +pvs +vgs +lvs -a -o+devices + +# lvremove is one of the few commands that is allowed to run +# when PVs are missing. The vg_write from this command sets +# the MISSING flag on the PV in the ondisk metadata. +# (this could be changed, the MISSING flag wouldn't need +# to be set in the first place since the PV isn't used.) +lvremove $vg/$lv1 + +# Check that MISSING flag is set in ondisk metadata. +pvck --dump metadata "$dev2" > meta +grep MISSING meta +rm meta + +# with MISSING flag in metadata, restrictions apply +not lvcreate -l1 $vg + +aux enable_dev "$dev1" + +# No LVs are using the PV with MISSING flag, so no restrictions +# are applied, and the vg_write here clears the MISSING flag on disk. +lvcreate -l1 $vg + +# Check that MISSING flag is not set in ondisk metadata. +pvck --dump metadata "$dev2" > meta +not grep MISSING meta +rm meta + + +pvs +vgs +lvs -a -o+devices + +vgchange -an $vg +vgremove -ff $vg + diff --git a/test/shell/missing-pv.sh b/test/shell/missing-pv.sh new file mode 100644 index 000000000..74cc43b0e --- /dev/null +++ b/test/shell/missing-pv.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +lvcreate -n $lv1 -L8M --type mirror -m 1 $vg +lvcreate -n $lv2 -L8M --type mirror -m 1 $vg + +vgchange -an $vg + +pvs +vgs +lvs -a -o+devices + +# Fail one leg of each mirror LV. +aux disable_dev "$dev1" + +pvs +vgs +lvs -a -o+devices + +# Cannot do normal activate of either LV with a failed leg. +not lvchange -ay $vg/$lv1 +not lvchange -ay $vg/$lv2 + +# Can activate with partial option. +lvchange -ay --activationmode partial $vg/$lv1 +lvchange -ay --activationmode partial $vg/$lv2 + +pvs +vgs +lvs -a -o+devices + +# Repair lv1 so it no longer uses failed dev. +lvconvert --repair --yes $vg/$lv1 + +# Check that MISSING flag is set in ondisk metadata, +# it should have been written by the lvconvert since the +# missing PV is still used by lv2. +pvck --dump metadata "$dev2" > meta +grep MISSING meta +rm meta + +pvs +vgs +lvs -a -o+devices + +# Verify normal activation is possible of lv1 since it's +# not using any failed devs, and partial activation is +# required for lv2 since it's still using the failed dev. +vgchange -an $vg +lvchange -ay $vg/$lv1 +not lvchange -ay $vg/$lv2 +vgchange -an $vg + +aux enable_dev "$dev1" + +pvs +vgs +lvs -a -o+devices + +# TODO: check that lv2 has partial flag, lv1 does not +# (there's no partial reporting option, only attr p.) + +# Check that MISSING flag is still set in ondisk +# metadata since the previously missing dev is still +# used by lv2. +pvck --dump metadata "$dev2" > meta +grep MISSING meta +rm meta + + +# The missing pv restrictions still apply even after +# the dev has reappeared since it has the MISSING flag. +not lvchange -ay $vg/$lv2 +not lvcreate -l1 $vg + +# Update old metadata on the previously missing PV. +# This should not clear the MISSING flag because the +# previously missing PV is still used by lv2. +# This would be done by any command that writes +# metadata, e.g. lvcreate, but since we are in a +# state with a missing pv, most commands that write +# metadata are restricted, so use a command that +# explicitly writes/fixes metadata. +vgck --updatemetadata $vg + +pvs +vgs +lvs -a -o+devices + +# Check that MISSING flag is still set in ondisk +# metadata since the previously missing dev is still +# used by lv2. +pvck --dump metadata "$dev2" > meta +grep MISSING meta +rm meta + + +# The missing pv restrictions still apply since it +# has the MISSING flag. +not lvchange -ay $vg/$lv2 +not lvcreate -l1 $vg + +lvchange -ay --activationmode partial $vg/$lv2 + +# After repair, no more LVs will be using the previously +# missing PV. +lvconvert --repair --yes $vg/$lv2 + +pvs +vgs +lvs -a -o+devices + +vgchange -an $vg + +# The next write of the metadata will clear the MISSING +# flag in ondisk metadata because the previously missing +# PV is no longer used by any LVs. + +# Run a command to write ondisk metadata, which should clear +# the MISSING flag, could also use vgck --updatemetadata vg. +lvcreate -l1 $vg + +# Check that the MISSING flag is no longer set +# in the ondisk metadata. +pvck --dump metadata "$dev2" > meta +not grep MISSING meta +rm meta + + +pvs +vgs +lvs -a -o+devices + +vgchange -an $vg +vgremove -ff $vg + From 9156640b6083690a7341d6073c491272f1187282 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 24 May 2019 15:18:18 -0500 Subject: [PATCH 19/22] tests: add metadata-old.sh --- test/shell/metadata-old.sh | 177 +++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 test/shell/metadata-old.sh diff --git a/test/shell/metadata-old.sh b/test/shell/metadata-old.sh new file mode 100644 index 000000000..53133bc6a --- /dev/null +++ b/test/shell/metadata-old.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +# +# Test "old metadata" repair which occurs when the VG is written +# and one of the PVs in the VG does not get written to, and then +# the PV reappears with the old metadata. This can happen if +# a command is killed or crashes after writing new metadata to +# only some of the PVs in the VG, or if a PV is temporarily +# inaccessible while a VG is written. +# + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +# +# Test that vgck --updatemetadata will update old metadata. +# + +lvcreate -n $lv1 -l1 -an $vg "$dev1" +lvcreate -n $lv2 -l1 -an $vg "$dev1" + +aux disable_dev "$dev2" + +pvs +pvs "$dev1" +not pvs "$dev2" +pvs "$dev3" +lvs $vg/$lv1 +lvs $vg/$lv2 + +lvremove $vg/$lv2 + +aux enable_dev "$dev2" + +pvs 2>&1 | tee out +grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +not lvs $vg/$lv2 + +# fixes the old metadata on dev1 +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +not lvs $vg/$lv2 + +# +# Test that any writing command will also update the +# old metadata. +# + +lvcreate -n $lv2 -l1 -an $vg "$dev1" + +aux disable_dev "$dev2" + +pvs +pvs "$dev1" +not pvs "$dev2" +pvs "$dev3" +lvs $vg/$lv1 +lvs $vg/$lv2 + +lvremove $vg/$lv2 + +aux enable_dev "$dev2" + +pvs 2>&1 | tee out +grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +not lvs $vg/$lv2 + +# fixes the old metadata on dev1 +lvcreate -n $lv3 -l1 -an $vg + +pvs 2>&1 | tee out +not grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +not lvs $vg/$lv2 + +vgremove -ff $vg + +# +# First two PVs with one mda, where both have old metadata. +# Third PV with two mdas, where the first mda has old +# metadata, and the second mda has current metadata. +# + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +pvcreate "$dev1" +pvcreate "$dev2" +pvcreate --pvmetadatacopies 2 "$dev3" + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +lvcreate -n $lv1 -l1 -an $vg "$dev3" +lvcreate -n $lv2 -l1 -an $vg "$dev3" + +# Save the metadata at this point... +dd if="$dev1" of=meta1 bs=4k count=4 +dd if="$dev2" of=meta2 bs=4k count=4 +dd if="$dev3" of=meta3 bs=4k count=4 + +# and now change metadata so the saved copies are old +lvcreate -n $lv3 -l1 -an $vg "$dev3" + +# Copy the saved metadata back to the three +# devs first mda, leaving the second mda on +# dev3 as the only latest copy of the metadata. + +dd if=meta1 of="$dev1" +dd if=meta2 of="$dev2" +dd if=meta3 of="$dev3" + +pvs 2>&1 | tee out +grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +# We still see the three LVs since we are using +# the latest copy of metadata from dev3:mda2 + +lvs $vg/$lv1 +lvs $vg/$lv2 +lvs $vg/$lv3 + +# This command which writes the VG should update +# all of the old copies. +lvcreate -n $lv4 -l1 -an $vg + +pvs 2>&1 | tee out +not grep "ignoring metadata seqno" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +lvs $vg/$lv2 +lvs $vg/$lv3 +lvs $vg/$lv4 + +vgchange -an $vg +vgremove -ff $vg From 4fa1638301672c94c22b7ceacc586316af897f29 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 24 May 2019 15:26:47 -0500 Subject: [PATCH 20/22] tests: add outdated-pv.sh --- test/shell/outdated-pv.sh | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 test/shell/outdated-pv.sh diff --git a/test/shell/outdated-pv.sh b/test/shell/outdated-pv.sh new file mode 100644 index 000000000..5c9b1b2f3 --- /dev/null +++ b/test/shell/outdated-pv.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +# +# Test handling of "outdated PV" which occurs when a PV goes missing +# from a VG, and while it's missing the PV is removed from the VG. +# Then the PV reappears with the old VG metadata that shows it is a +# member. That outdated metadata needs to be cleared. +# + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +lvcreate -n $lv1 -l1 -an $vg "$dev1" +lvcreate -n $lv2 -l1 -an $vg "$dev1" + +aux disable_dev "$dev2" + +vgreduce --removemissing $vg + +pvs + +aux enable_dev "$dev2" + +pvs 2>&1 | tee out +grep "outdated" out + +not pvs "$dev2" + +# The VG can still be used with the outdated PV around +lvcreate -n $lv3 -l1 $vg +lvchange -ay $vg +lvs $vg +lvchange -an $vg + +# Clears the outdated PV +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "outdated" out + +# The PV is no longer in the VG +pvs "$dev2" | tee out +not grep "$vg" out + +# The cleared PV can be added back to the VG +vgextend $vg "$dev2" + +pvs "$dev2" | tee out +grep "$vg" out + +vgremove -ff $vg + From 878741502a28c3e91d8fdff76ebcc83a68b04f91 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 24 May 2019 15:58:05 -0500 Subject: [PATCH 21/22] tests: add metadata-bad-text.sh --- test/shell/metadata-bad-text.sh | 236 ++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 test/shell/metadata-bad-text.sh diff --git a/test/shell/metadata-bad-text.sh b/test/shell/metadata-bad-text.sh new file mode 100644 index 000000000..4c59bc9f3 --- /dev/null +++ b/test/shell/metadata-bad-text.sh @@ -0,0 +1,236 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +pvs + +dd if="$dev1" of=meta1 bs=4k count=2 + +sed 's/flags =/flagx =/' meta1 > meta1.bad + +dd if=meta1.bad of="$dev1" + +pvs 2>&1 | tee out +grep "bad metadata text" out + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +# bad metadata in one mda doesn't prevent using +# the VG since other mdas are fine and usable +lvcreate -l1 $vg + + +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "bad metadata text" out + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +vgchange -an $vg +vgremove -ff $vg + + +# +# Same test as above, but corrupt metadata text +# on two of the three PVs, leaving one good +# copy of the metadata. +# + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +pvs + +dd if="$dev1" of=meta1 bs=4k count=2 +dd if="$dev2" of=meta2 bs=4k count=2 + +sed 's/READ/RRRR/' meta1 > meta1.bad +sed 's/seqno =/sss =/' meta2 > meta2.bad + +dd if=meta1.bad of="$dev1" +dd if=meta2.bad of="$dev2" + +pvs 2>&1 | tee out +grep "bad metadata text" out > out2 +grep "$dev1" out2 +grep "$dev2" out2 + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +# bad metadata in one mda doesn't prevent using +# the VG since other mdas are fine +lvcreate -l1 $vg + + +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "bad metadata text" out + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +vgchange -an $vg +vgremove -ff $vg + +# +# Three PVs where two have one mda, and the third +# has two mdas. The first mda is corrupted on all +# thee PVs, but the second mda on the third PV +# makes the VG usable. +# + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +pvcreate "$dev1" +pvcreate "$dev2" +pvcreate --pvmetadatacopies 2 "$dev3" + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +pvs + +dd if="$dev1" of=meta1 bs=4k count=2 +dd if="$dev2" of=meta2 bs=4k count=2 +dd if="$dev3" of=meta3 bs=4k count=2 + +sed 's/READ/RRRR/' meta1 > meta1.bad +sed 's/seqno =/sss =/' meta2 > meta2.bad +sed 's/id =/id/' meta3 > meta3.bad + +dd if=meta1.bad of="$dev1" +dd if=meta2.bad of="$dev2" +dd if=meta3.bad of="$dev3" + +pvs 2>&1 | tee out +grep "bad metadata text" out > out2 +grep "$dev1" out2 +grep "$dev2" out2 +grep "$dev3" out2 + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +# bad metadata in some mdas doesn't prevent using +# the VG if there's a good mda found +lvcreate -l1 $vg + + +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "bad metadata text" out + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +vgchange -an $vg +vgremove -ff $vg + +# +# Test that vgck --updatemetadata will update old metadata +# and repair bad metadata text at the same time from different +# devices. +# + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +pvcreate "$dev1" +pvcreate "$dev2" +pvcreate --pvmetadatacopies 2 "$dev3" + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +# Put bad metadata onto dev1 +dd if="$dev1" of=meta1 bs=4k count=2 +sed 's/READ/RRRR/' meta1 > meta1.bad +dd if=meta1.bad of="$dev1" + +pvs 2>&1 | tee out +grep "bad metadata text" out > out2 +grep "$dev1" out2 + +# We can still use the VG with other available +# mdas, skipping the bad mda. + +lvcreate -n $lv1 -l1 -an $vg "$dev1" +lvcreate -n $lv2 -l1 -an $vg "$dev1" + +# Put old metadata onto dev2 by updating +# the VG while dev2 is disabled. +aux disable_dev "$dev2" + +pvs +pvs "$dev1" +not pvs "$dev2" +pvs "$dev3" +lvs $vg/$lv1 +lvs $vg/$lv2 + +lvremove $vg/$lv2 + +aux enable_dev "$dev2" + +# Both old and bad metadata are reported. +pvs 2>&1 | tee out +grep "ignoring metadata seqno" out +grep "bad metadata text" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 +not lvs $vg/$lv2 + +# fixes the bad metadata on dev1, and +# fixes the old metadata on dev2. +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "ignoring metadata seqno" out +not grep "bad metadata text" out +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +lvs $vg/$lv1 + +vgchange -an $vg +vgremove -ff $vg + From d7c1168c6ac66fec30ef21c0a59d00d0259ab126 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 28 May 2019 15:20:30 -0500 Subject: [PATCH 22/22] tests: add metadata-bad-mdaheader.sh needs xxd command --- test/shell/metadata-bad-mdaheader.sh | 80 ++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 test/shell/metadata-bad-mdaheader.sh diff --git a/test/shell/metadata-bad-mdaheader.sh b/test/shell/metadata-bad-mdaheader.sh new file mode 100644 index 000000000..cf24936db --- /dev/null +++ b/test/shell/metadata-bad-mdaheader.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash + +# Copyright (C) 2008-2013,2018 Red Hat, Inc. All rights reserved. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. lib/inittest + +aux prepare_devs 3 +get_devs + +# +# Test corrupted mda_header.version field, which also +# causes the mda_header checksum to be bad. +# +# FIXME: if a VG has only a single PV, this repair +# doesn't work since there's no good PV to get +# metadata from. A more advanced repair capability +# is needed. +# + +dd if=/dev/zero of="$dev1" || true +dd if=/dev/zero of="$dev2" || true +dd if=/dev/zero of="$dev3" || true + +vgcreate $SHARED $vg "$dev1" "$dev2" "$dev3" + +pvs + +if [ -e "/usr/bin/xxd" ]; then + +# read mda_header which is 4k from start of disk +dd if="$dev1" of=meta1 bs=4k count=1 skip=1 + +# convert binary to text +xxd meta1 > meta1.txt + +# Corrupt mda_header by changing the version field from 0100 to 0200 +sed 's/0000010:\ 304e\ 2a3e\ 0100\ 0000\ 0010\ 0000\ 0000\ 0000/0000010:\ 304e\ 2a3e\ 0200\ 0000\ 0010\ 0000\ 0000\ 0000/' meta1.txt > meta1-bad.txt + +# convert text to binary +xxd -r meta1-bad.txt > meta1-bad + +# write bad mda_header back to disk +dd if=meta1-bad of="$dev1" bs=4k seek=1 + +# pvs reports bad metadata header +pvs 2>&1 | tee out +grep "bad metadata header" out + +fi + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +# bad metadata in one mda doesn't prevent using +# the VG since other mdas are fine and usable +lvcreate -l1 $vg + + +vgck --updatemetadata $vg + +pvs 2>&1 | tee out +not grep "bad metadata header" out + +pvs "$dev1" +pvs "$dev2" +pvs "$dev3" + +vgchange -an $vg +vgremove -ff $vg + +