new vg_read in progress

Move code that sets PV devices
Move the code that looks up a struct device for each PV in the metadata, and makes various adjustments to the struct vg accordingly. This code was buried in the metadata parsing code, where it didn't belong. It should happen once on the final struct vg assembled from the multiple metadata copies. It was happening when each copy of the metadata was parsed. When vg_read() has been renovated, it should go there, but for now it is pulled up to one layer below vg_read().
2025-11-24 08:23:49 +03:00 · 2017-10-18 16:23:22 -05:00 · 2017-10-18 14:10:53 -05:00 · 2017-10-18 14:10:45 -05:00 · 2017-10-18 14:10:37 -05:00 · 2017-10-18 14:10:32 -05:00
58 changed files with 5458 additions and 2103 deletions
--- a/69
+++ b/69
@@ -704,7 +704,9 @@ FSADM
 ELDFLAGS
 DM_LIB_PATCHLEVEL
 DMEVENTD_PATH
+AIO_LIBS
 DL_LIBS
+AIO
 DEVMAPPER
 DEFAULT_USE_LVMLOCKD
 DEFAULT_USE_LVMPOLLD
@@ -950,6 +952,7 @@ enable_profiling
 enable_testing
 enable_valgrind_pool
 enable_devmapper
+enable_aio
 enable_lvmetad
 enable_lvmpolld
 enable_lvmlockd_sanlock
@@ -1688,6 +1691,7 @@ Optional Features:
  --enable-testing        enable testing targets in the makefile
  --enable-valgrind-pool  enable valgrind awareness of pools
  --disable-devmapper     disable LVM2 device-mapper interaction
+  --disable-aio           disable async i/o
  --enable-lvmetad        enable the LVM Metadata Daemon
  --enable-lvmpolld       enable the LVM Polling Daemon
  --enable-lvmlockd-sanlock
@@ -3175,6 +3179,7 @@ case "$host_os" in
 		LDDEPS="$LDDEPS .export.sym"
 		LIB_SUFFIX=so
 		DEVMAPPER=yes
+		AIO=yes
 		BUILD_LVMETAD=no
 		BUILD_LVMPOLLD=no
 		LOCKDSANLOCK=no
@@ -3194,6 +3199,7 @@ case "$host_os" in
 		CLDNOWHOLEARCHIVE=
 		LIB_SUFFIX=dylib
 		DEVMAPPER=yes
+		AIO=no
 		ODIRECT=no
 		DM_IOCTLS=no
 		SELINUX=no
@@ -11816,6 +11822,67 @@ $as_echo "#define DEVMAPPER_SUPPORT 1" >>confdefs.h

 fi

+################################################################################
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use aio" >&5
+$as_echo_n "checking whether to use aio... " >&6; }
+# Check whether --enable-aio was given.
+if test "${enable_aio+set}" = set; then :
+  enableval=$enable_aio; AIO=$enableval
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $AIO" >&5
+$as_echo "$AIO" >&6; }
+
+if test "$AIO" = yes; then
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for io_setup in -laio" >&5
+$as_echo_n "checking for io_setup in -laio... " >&6; }
+if ${ac_cv_lib_aio_io_setup+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-laio  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char io_setup ();
+int
+main ()
+{
+return io_setup ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_aio_io_setup=yes
+else
+  ac_cv_lib_aio_io_setup=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_aio_io_setup" >&5
+$as_echo "$ac_cv_lib_aio_io_setup" >&6; }
+if test "x$ac_cv_lib_aio_io_setup" = xyes; then :
+
+$as_echo "#define AIO_SUPPORT 1" >>confdefs.h
+
+		AIO_LIBS="-laio"
+		AIO_SUPPORT=yes
+else
+  AIO_LIBS=
+		AIO_SUPPORT=no
+fi
+
+fi
+
 ################################################################################
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build LVMetaD" >&5
 $as_echo_n "checking whether to build LVMetaD... " >&6; }
@@ -15763,6 +15830,8 @@ _ACEOF



+
+



--- a/configure.in
+++ b/configure.in
@@ -39,6 +39,7 @@ case "$host_os" in
 		LDDEPS="$LDDEPS .export.sym"
 		LIB_SUFFIX=so
 		DEVMAPPER=yes
+		AIO=yes
 		BUILD_LVMETAD=no
 		BUILD_LVMPOLLD=no
 		LOCKDSANLOCK=no
@@ -58,6 +59,7 @@ case "$host_os" in
 		CLDNOWHOLEARCHIVE=
 		LIB_SUFFIX=dylib
 		DEVMAPPER=yes
+		AIO=no
 		ODIRECT=no
 		DM_IOCTLS=no
 		SELINUX=no
@@ -1115,6 +1117,24 @@ if test "$DEVMAPPER" = yes; then
 	AC_DEFINE([DEVMAPPER_SUPPORT], 1, [Define to 1 to enable LVM2 device-mapper interaction.])
 fi

+################################################################################
+dnl -- Disable aio
+AC_MSG_CHECKING(whether to use aio)
+AC_ARG_ENABLE(aio,
+	      AC_HELP_STRING([--disable-aio],
+			     [disable async i/o]),
+	      AIO=$enableval)
+AC_MSG_RESULT($AIO)
+
+if test "$AIO" = yes; then
+	AC_CHECK_LIB(aio, io_setup,
+		[AC_DEFINE([AIO_SUPPORT], 1, [Define to 1 if aio is available.])
+		AIO_LIBS="-laio"
+		AIO_SUPPORT=yes],
+		[AIO_LIBS=
+		AIO_SUPPORT=no ])
+fi
+
 ################################################################################
 dnl -- Build lvmetad
 AC_MSG_CHECKING(whether to build LVMetaD)
@@ -2056,9 +2076,11 @@ AC_SUBST(DEFAULT_USE_LVMETAD)
 AC_SUBST(DEFAULT_USE_LVMPOLLD)
 AC_SUBST(DEFAULT_USE_LVMLOCKD)
 AC_SUBST(DEVMAPPER)
+AC_SUBST(AIO)
 AC_SUBST(DLM_CFLAGS)
 AC_SUBST(DLM_LIBS)
 AC_SUBST(DL_LIBS)
+AC_SUBST(AIO_LIBS)
 AC_SUBST(DMEVENTD_PATH)
 AC_SUBST(DM_LIB_PATCHLEVEL)
 AC_SUBST(ELDFLAGS)
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -77,6 +77,10 @@ include $(top_builddir)/make.tmpl
 LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS)
 CFLAGS += -fno-strict-aliasing $(EXTRA_EXEC_CFLAGS)

+ifeq ("@AIO@", "yes")
+	LIBS += $(AIO_LIBS)
+endif
+
 INSTALL_TARGETS = \
 	install_clvmd

--- a/daemons/clvmd/lvm-functions.c
+++ b/daemons/clvmd/lvm-functions.c
@@ -838,20 +838,20 @@ static void check_config(void)
 void lvm_do_backup(const char *vgname)
 {
 	struct volume_group * vg;
-	int consistent = 0;

 	DEBUGLOG("Triggering backup of VG metadata for %s.\n", vgname);

 	pthread_mutex_lock(&lvm_lock);

-	vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, WARN_PV_READ, &consistent);
+	vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, 0, NULL, NULL, NULL);

-	if (vg && consistent)
+	if (vg)
 		check_current_backup(vg);
 	else
 		log_error("Error backing up metadata, can't find VG for group %s", vgname);

-	release_vg(vg);
+	if (vg)
+		release_vg(vg);
 	dm_pool_empty(cmd->mem);

 	pthread_mutex_unlock(&lvm_lock);
--- a/doc/lvm-disk-reading.txt
+++ b/doc/lvm-disk-reading.txt
@@ -0,0 +1,246 @@
+
+LVM disk reading
+
+Reading disks happens in two phases.  The first is a discovery phase,
+which determines what's on the disks.  The second is a working phase,
+which does a particular job for the command.
+
+
+Phase 1: Discovery
+------------------
+
+Read all the disks on the system to find out:
+- What are the LVM devices?
+- What VG's exist on those devices?
+
+This phase is called "label scan" (although it reads and scans everything,
+not just the label.)  It stores the information it discovers (what LVM
+devices exist, and what VGs exist on them) in lvmcache.  The devs/VGs info
+in lvmcache is the starting point for phase two.
+
+
+Phase 1 in outline:
+
+For each device:
+
+a. Read the first <N> KB of the device. (N is configurable.)
+
+b. Look for the lvm label_header in the first four sectors,
+   if none exists, it's not an lvm device, so quit looking at it.
+   (By default, label_header is in the second sector.)
+
+c. Look at the pv_header, which follows the label_header.
+   This tells us the location of VG metadata on the device.
+   There can be 0, 1 or 2 copies of VG metadata.  The first
+   is always at the start of the device, the second (if used)
+   is at the end.
+
+d. Look at the first mda_header (location came from pv_header
+   in the previous step).  This is by default in sector 8,
+   4096 bytes from the start of the device.  This tells us the
+   location of the actual VG metadata text.
+
+e. Look at the first copy of the text VG metadata (location came
+   from mda_header in the previous step).  This is by default
+   in sector 9, 4608 bytes from the start of the device.
+   The VG metadata is only partially analyzed to create a basic
+   summary of the VG.
+
+f. Store an "info" entry in lvmcache for this device,
+   indicating that it is an lvm device, and store a "vginfo"
+   entry in lvmcache indicating the name of the VG seen
+   in the metadata in step e.
+
+g. If the pv_header in step c shows a second mda_header
+   location at the end of the device, then read that as
+   in step d, and repeat steps e-f for it.
+
+At the end of step 1, lvmcache will have a list of devices
+that belong to LVM, and a list of VG names that exist on
+those devices.  Each device (info struct) is associated
+with the VG (vginfo struct) it is used in.
+
+If the number <N> of KB read in step (a) was large enough, then
+all the structs/metadata needed in steps b-e will be found
+in the data buffer returned by a.  If a particular struct
+or metadata needed in steps b-e are located outside the range
+of the initial read, then those steps need to issue their own
+read at the necessary location to get that bit of data.
+(The optional second mda_header and VG metadata in step g
+is located at the end of the device, and will always require
+an additional read.)
+
+
+Phase 1 in code:
+
+The most relevant functions are listed for each step in the outline.
+
+For each device:
+   lvmcache_label_scan()
+   label_scan()
+   _label_scan_async()
+   for each dev: dev = dev_iter_get(iter)
+
+a. _label_read_async_start()
+
+b. _label_read_data_process()
+   _find_label_header()
+
+c. _label_read_data_process()
+   ops->read()
+   _text_read()
+
+d. _read_mda_header_and_metadata()
+   raw_read_mda_header()
+
+e. _read_mda_header_and_metadata()
+   read_metadata_location()
+   text_read_metadata_summary()
+   config_file_read_fd()
+   ops->read_vgsummary()
+   _read_vgsummary()
+
+f. _text_read(): lvmcache_add()
+     [adds this device to list of lvm devices]
+   _read_mda_header_and_metadata(): lvmcache_update_vgname_and_id()
+     [adds the VG name to list of VGs]
+
+
+Phase 1 in log messages:
+
+For each device:
+   Scanning data from all devs async
+
+a. Reading sectors from device <dev>
+
+b. Parsing label and data from device <dev>
+
+d. Copying mda header sector from <dev> ...
+   or if the mda_header needs to be read from disk:
+   Reading mda header sector from <dev> ...
+   
+e. Copying metadata summary for <dev> ...
+   or if the metadata needs to be read from disk:
+   Reading metadata summary for <dev> ...
+
+f. lvmcache <dev> ...
+
+
+Phase 2: Work
+-------------
+
+This phase carries out the operation requested by the command that was
+run.
+
+Whereas the first phase is based on iterating through each device on the
+system, this phase is based on iterating through each VG name.  The list
+of VG names comes from phase 1, which stored the list in lvmcache to be
+used by phase 2.
+
+Some commands may need to iterate through all VG names, while others may
+need to iterate through just one or two.
+
+This phase includes locking each VG as work is done on it, so that two
+commands do not interfere with each other.
+
+
+Phase 2 in outline:
+
+For each VG name:
+
+a. Lock the VG.
+
+b. Repeat the phase 1 scan steps for each device (PV) in this VG.
+   The phase 1 information in lvmcache may have changed because no VG lock
+   was held during phase 1.  So, repeat the phase 1 steps, but only for the
+   devices in this VG.
+
+c. Get the list of on-disk metadata locations for this VG.
+   Phase 1 created this list in lvmcache to be used here.  At this
+   point we copy it out of lvmcache.  In the simple/common case,
+   this is a list of devices in the VG.  But, some devices may
+   have 0 or 2 metadata locations instead of the default 1, so it
+   is not always equal to the list of devices.  We want to read
+   every copy of the metadata for this VG.
+
+d. For each metadata location on each device in the VG
+   (the list from the previous step):
+
+    1) Look at the mda_header.  The location of the mda_header was saved
+       in the lvmcache info struct by phase 1 (where it came from the
+       pv_header.) The mda_header tells us where the text VG metadata is
+       located.
+
+    2) Look at the text VG metadata.  The location came from mda_header
+       in the previous step.  The VG metadata is fully analyzed and used
+       to create an in-memory 'struct volume_group'.
+
+    Copying or reading the mda_header and VG metadata in steps d.1 and d.2
+    follow the same model as in phase 1:  if the data read in scan step 2.b
+    covered these areas, then data is simply copied out of the buffer from
+    step 2.b, otherwise new reads are done.
+
+e. Compare the copies of VG metadata that were found in each location.
+   If some copies are older, choose the newest one to use, and update
+   any older copies.
+
+f. Update details about the devices/VG in lvmcache.
+
+g. Pass the 'vg' struct to the command-specific code to work with.
+
+
+Phase 2 in code:
+
+The most relevant functions are listed for each step in the outline.
+
+For each VG name:
+   process_each_vg()
+
+a. vg_read()
+   lock_vol()
+
+b. vg_read()
+   lvmcache_label_rescan_vg()
+   [insert phase 1 steps a-f]
+
+c. vg_read()
+   create_instance()
+   _text_create_text_instance()
+   _create_vg_text_instance()
+   lvmcache_fid_add_mdas_vg()
+   [Copies mda locations from info->mdas where it was saved
+    by phase 1, into fid->metadata_areas_in_use.  This is
+    the key connection between phase 1 and phase 2.]
+
+d. dm_list_iterate_items(mda, &fid->metadata_areas_in_use)
+
+d1. ops->vg_read()
+    _vg_read_raw()
+    raw_read_mda_header()
+
+d2. _vg_read_raw()
+    text_read_metadata()
+    config_file_read_fd()
+    ops->read_vg()
+    _read_vg()
+
+
+Phase 2 in log messages:
+
+For each VG name:
+   Processing VG <name>
+   Reading VG <name>
+
+b. Reading VG rereading labels for <name>
+   Scanning data from devs async
+   [insert log messages from phase 1 steps a-f]
+   Scanned data from <N> devs async
+
+For each mda on each <dev> in the VG:
+
+d. Reading VG <name> from <dev>
+
+d.1. Copying|Reading mda header sector from <dev> ...
+
+d.2. Copying|Reading metadata from <dev> ...
+
--- a/lib/activate/activate.c
+++ b/lib/activate/activate.c
@@ -28,6 +28,7 @@
 #include "config.h"
 #include "segtype.h"
 #include "sharedlib.h"
+#include "lvmcache.h"

 #include <limits.h>
 #include <fcntl.h>
@@ -2123,6 +2124,17 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
 	if (!lv_info(cmd, lv, laopts->origin_only, &info, 0, 0))
 		goto_out;

+	/*
+	 * Save old and new (current and precommitted) versions of the
+	 * VG metadata for lv_resume() to use, since lv_resume can't
+	 * read metadata given that devices are suspended.  lv_resume()
+	 * will resume LVs using the old/current metadata if the vg_commit
+	 * did happen (or failed), and it will resume LVs using the
+	 * new/precommitted metadata if the vg_commit succeeded.
+	 */
+	lvmcache_save_suspended_vg(lv->vg, 0);
+	lvmcache_save_suspended_vg(lv_pre->vg, 1);
+
 	if (!info.exists || info.suspended) {
 		if (!error_if_not_suspended) {
 			r = 1;
@@ -2281,15 +2293,54 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
 		      struct lv_activate_opts *laopts, int error_if_not_active,
 	              const struct logical_volume *lv)
 {
-	const struct logical_volume *lv_to_free = NULL;
+	struct volume_group *vg = NULL;
+	struct logical_volume *lv_found = NULL;
+	const union lvid *lvid;
+	const char *vgid;
 	struct lvinfo info;
 	int r = 0;

 	if (!activation())
 		return 1;

-	if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0)))
-		goto_out;
+	/*
+	 * When called in clvmd, lvid_s is set and lv is not.  We need to
+	 * get the VG metadata without reading disks because devs are
+	 * suspended.  lv_suspend() saved old and new VG metadata for us
+	 * to use here.  If vg_commit() happened, lvmcache_get_suspended_vg
+	 * will return the new metadata for us to use in resuming LVs.
+	 * If vg_commit() did not happen, lvmcache_get_suspended_vg
+	 * returns the old metadata which we use to resume LVs.
+	 */
+	if (!lv && lvid_s) {
+		lvid = (const union lvid *) lvid_s;
+		vgid = (const char *)lvid->id[0].uuid;
+
+		if ((vg = lvmcache_get_suspended_vg(vgid))) {
+			log_debug_activation("Resuming LVID %s found saved vg seqno %d %s", lvid_s, vg->seqno, vg->name);
+			if ((lv_found = find_lv_in_vg_by_lvid(vg, lvid))) {
+				log_debug_activation("Resuming LVID %s found saved LV %s", lvid_s, display_lvname(lv_found));
+				lv = lv_found;
+			} else
+				log_debug_activation("Resuming LVID %s did not find saved LV", lvid_s);
+		} else
+			log_debug_activation("Resuming LVID %s did not find saved VG", lvid_s);
+
+		/*
+		 * resume must have been called without a preceding suspend,
+		 * so we need to read the vg.
+		 */
+
+		if (!lv) {
+			log_debug_activation("Resuming LVID %s reading VG", lvid_s);
+			if (!(lv_found = lv_from_lvid(cmd, lvid_s, 0))) {
+				log_debug_activation("Resuming LVID %s failed to read VG", lvid_s);
+				goto out;
+			}
+
+			lv = lv_found;
+		}
+	}

 	if (!lv_is_origin(lv) && !lv_is_thin_volume(lv) && !lv_is_thin_pool(lv))
 		laopts->origin_only = 0;
@@ -2334,9 +2385,6 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,

 	r = 1;
 out:
-	if (lv_to_free)
-		release_vg(lv_to_free->vg);
-
 	return r;
 }

@@ -2463,6 +2511,10 @@ int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s,
 			 int *activate_lv, const struct logical_volume *lv)
 {
 	const struct logical_volume *lv_to_free = NULL;
+	struct volume_group *vg = NULL;
+	struct logical_volume *lv_found = NULL;
+	const union lvid *lvid;
+	const char *vgid;
 	int r = 0;

 	if (!activation()) {
@@ -2470,6 +2522,24 @@ int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s,
 		return 1;
 	}

+	/*
+	 * This function is called while devices are suspended,
+	 * so try to use the copy of the vg that was saved in
+	 * lv_suspend.
+	 */
+	if (!lv && lvid_s) {
+		lvid = (const union lvid *) lvid_s;
+		vgid = (const char *)lvid->id[0].uuid;
+
+		if ((vg = lvmcache_get_suspended_vg(vgid))) {
+			log_debug_activation("activation_filter for %s found saved VG seqno %d %s", lvid_s, vg->seqno, vg->name);
+			if ((lv_found = find_lv_in_vg_by_lvid(vg, lvid))) {
+				log_debug_activation("activation_filter for %s found saved LV %s", lvid_s, display_lvname(lv_found));
+				lv = lv_found;
+			}
+		}
+	}
+
 	if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0)))
 		goto_out;

--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@@ -59,6 +59,7 @@ struct lvmcache_vgsummary {
 	const char *lock_type;
 	uint32_t mda_checksum;
 	size_t mda_size;
+	int seqno;
 };

 int lvmcache_init(void);
@@ -74,6 +75,7 @@ void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset);
 */
 void lvmcache_force_next_label_scan(void);
 int lvmcache_label_scan(struct cmd_context *cmd);
+int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid);

 /* Add/delete a device */
 struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
@@ -105,10 +107,8 @@ struct lvmcache_vginfo *lvmcache_vginfo_from_vgid(const char *vgid);
 struct lvmcache_info *lvmcache_info_from_pvid(const char *pvid, struct device *dev, int valid_only);
 const char *lvmcache_vgname_from_vgid(struct dm_pool *mem, const char *vgid);
 const char *lvmcache_vgid_from_vgname(struct cmd_context *cmd, const char *vgname);
-struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid,
-				unsigned *scan_done_once, uint64_t *label_sector);
-const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
-				       const char *devname);
+struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, uint64_t *label_sector);
+const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, const char *devname);
 char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid);
 const char *lvmcache_vgname_from_info(struct lvmcache_info *info);
 const struct format_type *lvmcache_fmt_from_info(struct lvmcache_info *info);
@@ -134,9 +134,6 @@ int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
 struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname,
 				const char *vgid);

-/* Returns cached volume group metadata. */
-struct volume_group *lvmcache_get_vg(struct cmd_context *cmd, const char *vgname,
-				     const char *vgid, unsigned precommitted);
 void lvmcache_drop_metadata(const char *vgname, int drop_precommitted);
 void lvmcache_commit_metadata(const char *vgname);

@@ -215,4 +212,23 @@ void lvmcache_remove_unchosen_duplicate(struct device *dev);

 int lvmcache_pvid_in_unchosen_duplicates(const char *pvid);

+void lvmcache_save_suspended_vg(struct volume_group *vg, int precommitted);
+struct volume_group *lvmcache_get_suspended_vg(const char *vgid);
+void lvmcache_drop_suspended_vg(struct volume_group *vg);
+
+int lvmcache_get_vg_devs(struct cmd_context *cmd,
+                         struct lvmcache_vginfo *vginfo,
+                         struct dm_list *devs);
+
+void lvmcache_set_independent_location(const char *vgname);
+
+void lvmcache_remove_defective_dev(struct device *dev);
+int lvmcache_add_defective_dev(struct device *dev);
+int lvmcache_dev_is_defective(struct device *dev);
+int lvmcache_get_defective_devs(struct cmd_context *cmd, struct dm_list *head);
+const struct format_type *lvmcache_get_fmt(struct cmd_context *cmd, const char *vgname, const char *vgid);
+int lvmcache_update_vg_from_metadata(struct volume_group *vg, unsigned precommitted,
+				     uint32_t meta_checksum, size_t meta_size);
+
+
 #endif
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -39,7 +39,7 @@ static int64_t _lvmetad_update_timeout;

 static int _found_lvm1_metadata = 0;

-static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
+static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg, const char *vgid, struct format_type *fmt);

 static uint64_t _monotonic_seconds(void)
 {
@@ -1090,14 +1090,17 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
 		 * invalidated the cached vg.
 		 */
 		if (rescan) {
-			if (!(vg2 = _lvmetad_pvscan_vg(cmd, vg))) {
+			if (!(vg2 = _lvmetad_pvscan_vg(cmd, vg, vgid, fmt))) {
 				log_debug_lvmetad("VG %s from lvmetad not found during rescan.", vgname);
 				fid = NULL;
 				release_vg(vg);
 				vg = NULL;
 				goto out;
 			}
+			fid->ref_count++;
 			release_vg(vg);
+			fid->ref_count--;
+			fmt->ops->destroy_instance(fid);
 			vg = vg2;
 			fid = vg2->fid;
 		}
@@ -1105,14 +1108,14 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
 		dm_list_iterate_items(pvl, &vg->pvs) {
 			if (!_pv_update_struct_pv(pvl->pv, fid)) {
 				vg = NULL;
-				goto_out;	/* FIXME error path */
+				goto_out;	/* FIXME: use an error path that disables lvmetad */
 			}
 		}

 		dm_list_iterate_items(pvl, &vg->pvs_outdated) {
 			if (!_pv_update_struct_pv(pvl->pv, fid)) {
 				vg = NULL;
-				goto_out;	/* FIXME error path */
+				goto_out;	/* FIXME: use an error path that disables lvmetad */
 			}
 		}

@@ -1756,6 +1759,7 @@ int lvmetad_pv_gone_by_dev(struct device *dev)
 */

 struct _lvmetad_pvscan_baton {
+	struct cmd_context *cmd;
 	struct volume_group *vg;
 	struct format_instance *fid;
 };
@@ -1763,12 +1767,21 @@ struct _lvmetad_pvscan_baton {
 static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 {
 	struct _lvmetad_pvscan_baton *b = baton;
+	struct device *mda_dev = mda_get_device(mda);
+	struct label_read_data *ld;
 	struct volume_group *vg;

+	ld = get_label_read_data(b->cmd, mda_dev);
+
 	if (mda_is_ignored(mda) ||
-	    !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
+	    !(vg = mda->ops->vg_read(b->fid, "", mda, ld, 0, 0, NULL)))
 		return 1;

+	if (mda->read_failed_flags) {
+		release_vg(vg);
+		return 1;
+	}
+
 	/* FIXME Also ensure contents match etc. */
 	if (!b->vg || vg->seqno > b->vg->seqno)
 		b->vg = vg;
@@ -1778,6 +1791,42 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 	return 1;
 }

+/*
+ * FIXME: handle errors and do proper comparison of metadata from each area
+ * like vg_read and fall back to real vg_read from disk if there's any problem.
+ */
+
+static int _lvmetad_pvscan_vg_single(struct metadata_area *mda, void *baton)
+{
+	struct _lvmetad_pvscan_baton *b = baton;
+	struct device *mda_dev = mda_get_device(mda);
+	struct label_read_data *ld;
+	struct volume_group *vg = NULL;
+
+	if (mda_is_ignored(mda))
+		return 1;
+
+	ld = get_label_read_data(b->cmd, mda_dev);
+
+	if (!(vg = mda->ops->vg_read(b->fid, "", mda, ld, 0, 0, NULL)))
+		return 1;
+
+	if (mda->read_failed_flags) {
+		release_vg(vg);
+		return 1;
+	}
+
+	if (!b->vg)
+		b->vg = vg;
+	else if (vg->seqno > b->vg->seqno) {
+		release_vg(b->vg);
+		b->vg = vg;
+	} else
+		release_vg(vg);
+
+	return 1;
+}
+
 /*
 * The lock manager may detect that the vg cached in lvmetad is out of date,
 * due to something like an lvcreate from another host.
@@ -1787,41 +1836,41 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 * the VG, and that PV may have been reused for another VG.
 */

-static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
+static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg,
+					      const char *vgid, struct format_type *fmt)
 {
 	char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
 	char uuid[64] __attribute__((aligned(8)));
-	struct label *label;
-	struct volume_group *vg_ret = NULL;
-	struct dm_config_tree *vgmeta_ret = NULL;
 	struct dm_config_tree *vgmeta;
 	struct pv_list *pvl, *pvl_new;
-	struct device_list *devl, *devl_new, *devlsafe;
+	struct device_list *devl, *devlsafe;
 	struct dm_list pvs_scan;
 	struct dm_list pvs_drop;
-	struct dm_list pvs_new;
+	struct lvmcache_vginfo *vginfo = NULL;
 	struct lvmcache_info *info = NULL;
 	struct format_instance *fid;
 	struct format_instance_ctx fic = { .type = 0 };
 	struct _lvmetad_pvscan_baton baton;
+	struct volume_group *save_vg;
+	struct dm_config_tree *save_meta;
 	struct device *save_dev = NULL;
 	uint32_t save_seqno = 0;
-	int missing_devs = 0;
-	int check_new_pvs = 0;
+	int found_new_pvs = 0;
+	int retried_reads = 0;
 	int found;

+	save_vg = NULL;
+	save_meta = NULL;
+	save_dev = NULL;
+	save_seqno = 0;
+
 	dm_list_init(&pvs_scan);
 	dm_list_init(&pvs_drop);
-	dm_list_init(&pvs_new);

-	log_debug_lvmetad("Rescanning VG %s (seqno %u).", vg->name, vg->seqno);
+	log_debug_lvmetad("Rescan VG %s to update lvmetad (seqno %u).", vg->name, vg->seqno);

 	/*
-	 * Another host may have added a PV to the VG, and some
-	 * commands do not always populate their lvmcache with
-	 * all devs from lvmetad, so they would fail to find
-	 * the new PV when scanning the VG.  So make sure this
-	 * command knows about all PVs from lvmetad.
+	 * Make sure this command knows about all PVs from lvmetad.
 	 */
 	lvmcache_seed_infos_from_lvmetad(cmd);

@@ -1836,54 +1885,111 @@ static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct v
 		dm_list_add(&pvs_scan, &devl->list);
 	}

-scan_more:
+	/*
+	 * Rescan labels/metadata only from devs that we previously
+	 * saw in the VG.  If we find below that there are new PVs
+	 * in the VG, we'll have to rescan all devices to find which
+	 * device(s) are now being used.
+	 */
+	log_debug_lvmetad("Rescan VG %s scanning data from devs in previous metadata.", vg->name);
+
+	label_scan_devs(cmd, &pvs_scan);

 	/*
-	 * Run the equivalent of lvmetad_pvscan_single on each dev in the VG.
+	 * Check if any pvs_scan entries are no longer PVs.
+	 * In that case, label_read/_find_label_header will have
+	 * found no label_header, and would have dropped the
+	 * info struct for the device from lvmcache.  So, if
+	 * we look up the info struct here and don't find it,
+	 * we can infer it's no longer a PV.
+	 *
+	 * FIXME: we should record specific results from the
+	 * label_read and then check specifically for whatever
+	 * result means "no label was found", rather than going
+	 * about this indirectly via the lvmcache side effects.
+	 */
+	dm_list_iterate_items_safe(devl, devlsafe, &pvs_scan) {
+		if (!(info = lvmcache_info_from_pvid(devl->dev->pvid, devl->dev, 0))) {
+			/* Another host removed this PV from the VG. */
+			log_debug_lvmetad("Rescan VG %s from %s dropping dev (no label).",
+					  vg->name, dev_name(devl->dev));
+			dm_list_move(&pvs_drop, &devl->list);
+		}
+	}
+
+	fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS;
+	fic.context.vg_ref.vg_name = vg->name;
+	fic.context.vg_ref.vg_id = vgid;
+
+ retry_reads:
+
+	if (!(fid = fmt->ops->create_instance(fmt, &fic))) {
+		/* FIXME: are there only internal reasons for failures here? */
+		log_error("Reading VG %s failed to create format instance.", vg->name);
+		return NULL;
+	}
+
+	/* FIXME: not sure if this is necessary */
+	fid->ref_count++;
+
+	baton.fid = fid;
+	baton.cmd = cmd;
+
+	/*
+	 * FIXME: this vg_read path does not have the ability to repair
+	 * any problems with the VG, e.g. VG on one dev has an older
+	 * seqno.  When vg_read() is reworked, we need to fall back
+	 * to using that from here (and vg_read's from lvmetad) when
+	 * there is a problem.  Perhaps by disabling lvmetad when a
+	 * VG problem is detected, causing commands to fully fall
+	 * back to disk, which will repair the VG.  Then lvmetad can
+	 * be repopulated and re-enabled (possibly automatically.)
+	 */
+
+	/*
+	 * Do a low level vg_read on each dev, verify the vg returned
+	 * from metadata on each device is for the VG being read
+	 * (the PV may have been removed from the VG being read and
+	 * added to a different one), and return this vg to the caller
+	 * as the current vg to use.
+	 *
+	 * The label scan above will have saved in lvmcache which
+	 * vg each device is used in, so we could figure that part
+	 * out without doing the vg_read.
 	 */
 	dm_list_iterate_items_safe(devl, devlsafe, &pvs_scan) {
 		if (!devl->dev)
 			continue;

-		log_debug_lvmetad("Rescan VG %s scanning %s.", vg->name, dev_name(devl->dev));
-
-		if (!label_read(devl->dev, &label, 0)) {
-			/* Another host removed this PV from the VG. */
-			log_debug_lvmetad("Rescan VG %s found %s was removed.", vg->name, dev_name(devl->dev));
-
-			if ((info = lvmcache_info_from_pvid(devl->dev->pvid, NULL, 0)))
-				lvmcache_del(info);
+		log_debug_lvmetad("Rescan VG %s getting metadata from %s.",
+				  vg->name, dev_name(devl->dev));

+		/*
+		 * The info struct for this dev knows what and where
+		 * the mdas are for this dev (the label scan saved
+		 * the mda locations for this dev on the lvmcache info struct).
+		 */
+		if (!(info = lvmcache_info_from_pvid(devl->dev->pvid, devl->dev, 0))) {
+			log_debug_lvmetad("Rescan VG %s from %s dropping dev (no info).",
+					  vg->name, dev_name(devl->dev));
 			dm_list_move(&pvs_drop, &devl->list);
 			continue;
 		}

-		info = (struct lvmcache_info *) label->info;
-
 		baton.vg = NULL;
-		baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
-		if (!baton.fid)
-			return_NULL;
-
-		if (baton.fid->fmt->features & FMT_OBSOLETE) {
-			log_debug_lvmetad("Ignoring obsolete format on PV %s in VG %s.", dev_name(devl->dev), vg->name);
-			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
-			dm_list_move(&pvs_drop, &devl->list);
-			continue;
-		}

 		/*
 		 * Read VG metadata from this dev's mdas.
 		 */
-		lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
+		lvmcache_foreach_mda(info, _lvmetad_pvscan_vg_single, &baton);

 		/*
 		 * The PV may have been removed from the VG by another host
 		 * since we last read the VG.
 		 */
 		if (!baton.vg) {
-			log_debug_lvmetad("Rescan VG %s did not find %s.", vg->name, dev_name(devl->dev));
-			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+			log_debug_lvmetad("Rescan VG %s from %s dropping dev (no metadata).",
+					  vg->name, dev_name(devl->dev));
 			dm_list_move(&pvs_drop, &devl->list);
 			continue;
 		}
@@ -1893,10 +1999,15 @@ scan_more:
 		 * different VG since we last read the VG.
 		 */
 		if (strcmp(baton.vg->name, vg->name)) {
-			log_debug_lvmetad("Rescan VG %s found different VG %s on PV %s.",
-					  vg->name, baton.vg->name, dev_name(devl->dev));
+			log_debug_lvmetad("Rescan VG %s from %s dropping dev (other VG %s).",
+					  vg->name, dev_name(devl->dev), baton.vg->name);
+			release_vg(baton.vg);
+			continue;
+		}
+
+		if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
+			log_error("VG export to config tree failed");
 			release_vg(baton.vg);
-			dm_list_move(&pvs_drop, &devl->list);
 			continue;
 		}

@@ -1906,20 +2017,35 @@ scan_more:
 		 * read from each other dev.
 		 */

-		if (!save_seqno)
-			save_seqno = baton.vg->seqno;
+		if (save_vg && (save_seqno != baton.vg->seqno)) {
+			/* FIXME: fall back to vg_read to correct this. */
+			log_warn("WARNING: inconsistent metadata for VG %s on devices %s seqno %u and %s seqno %u.",
+				 vg->name, dev_name(save_dev), save_seqno,
+				 dev_name(devl->dev), baton.vg->seqno);
+			log_warn("WARNING: temporarily disable lvmetad to repair metadata.");

-		if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
-			log_error("VG export to config tree failed");
-			release_vg(baton.vg);
-			return NULL;
+			/* Use the most recent */
+			if (save_seqno < baton.vg->seqno) {
+				release_vg(save_vg);
+				dm_config_destroy(save_meta);
+				save_vg = baton.vg;
+				save_meta = vgmeta;
+				save_seqno = baton.vg->seqno;
+				save_dev = devl->dev;
+			} else {
+				release_vg(baton.vg);
+				dm_config_destroy(vgmeta);
+			}
+			continue;
 		}

-		if (!vgmeta_ret) {
-			vgmeta_ret = vgmeta;
+		if (!save_vg) {
+			save_vg = baton.vg;
+			save_meta = vgmeta;
+			save_seqno = baton.vg->seqno;
 			save_dev = devl->dev;
 		} else {
-			struct dm_config_node *meta1 = vgmeta_ret->root;
+			struct dm_config_node *meta1 = save_meta->root;
 			struct dm_config_node *meta2 = vgmeta->root;
 			struct dm_config_node *sib1 = meta1->sib;
 			struct dm_config_node *sib2 = meta2->sib;
@@ -1944,73 +2070,128 @@ scan_more:
 			meta2->sib = NULL;

 			if (compare_config(meta1, meta2)) {
+				/* FIXME: fall back to vg_read to correct this. */
+				log_warn("WARNING: inconsistent metadata for VG %s on devices %s seqno %u and %s seqno %u.",
+				 	 vg->name, dev_name(save_dev), save_seqno,
+					 dev_name(devl->dev), baton.vg->seqno);
+				log_warn("WARNING: temporarily disable lvmetad to repair metadata.");
 				log_error("VG %s metadata comparison failed for device %s vs %s",
 					  vg->name, dev_name(devl->dev), save_dev ? dev_name(save_dev) : "none");
-				_log_debug_inequality(vg->name, vgmeta_ret->root, vgmeta->root);
+				_log_debug_inequality(vg->name, save_meta->root, vgmeta->root);

 				meta1->sib = sib1;
 				meta2->sib = sib2;
-				dm_config_destroy(vgmeta);
-				dm_config_destroy(vgmeta_ret);
+
+				/* no right choice, just use the previous copy */
 				release_vg(baton.vg);
-				return NULL;
+				dm_config_destroy(vgmeta);
 			}
 			meta1->sib = sib1;
 			meta2->sib = sib2;
+			release_vg(baton.vg);
 			dm_config_destroy(vgmeta);
 		}
+	}

-		/*
-		 * Look for any new PVs in the VG metadata that were not in our
-		 * previous version of the VG.  Add them to pvs_new to be
-		 * scanned in this loop just like the old PVs.
-		 */
-		if (!check_new_pvs) {
-			check_new_pvs = 1;
-			dm_list_iterate_items(pvl_new, &baton.vg->pvs) {
-				found = 0;
-				dm_list_iterate_items(pvl, &vg->pvs) {
-					if (pvl_new->pv->dev != pvl->pv->dev)
-						continue;
-					found = 1;
-					break;
-				}
-				if (found)
+	/* FIXME: see above */
+	fid->ref_count--;
+
+	/*
+	 * Look for any new PVs in the VG metadata that were not in our
+	 * previous version of the VG.
+	 *
+	 * (Don't look for new PVs after a rescan and retry.)
+	 */
+	found_new_pvs = 0;
+
+	if (save_vg && !retried_reads) {
+		dm_list_iterate_items(pvl_new, &save_vg->pvs) {
+			found = 0;
+			dm_list_iterate_items(pvl, &vg->pvs) {
+				if (pvl_new->pv->dev != pvl->pv->dev)
 					continue;
-				if (!pvl_new->pv->dev) {
-					strncpy(pvid_s, (char *) &pvl_new->pv->id, sizeof(pvid_s) - 1);
-					if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid)))
-						stack;
-					log_error("Device not found for PV %s in VG %s", uuid, vg->name);
-					missing_devs++;
-					continue;
-				}
-				if (!(devl_new = dm_pool_zalloc(cmd->mem, sizeof(*devl_new))))
-					return_NULL;
-				devl_new->dev = pvl_new->pv->dev;
-				dm_list_add(&pvs_new, &devl_new->list);
-				log_debug_lvmetad("Rescan VG %s found %s was added.", vg->name, dev_name(devl_new->dev));
+				found = 1;
+				break;
+			}
+
+			/*
+			 * PV in new VG metadata not found in old VG metadata.
+			 * There's a good chance we don't know about this new
+			 * PV or what device it's on; a label scan is needed
+			 * of all devices so we know which device the VG is
+			 * now using.
+			 */
+			if (!found) {
+				found_new_pvs++;
+				strncpy(pvid_s, (char *) &pvl_new->pv->id, sizeof(pvid_s) - 1);
+				if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid)))
+					stack;
+				log_debug_lvmetad("Rescan VG %s found new PV %s.", vg->name, uuid);
 			}
 		}
+	}

-		release_vg(baton.vg);
+	if (!save_vg && retried_reads) {
+		log_error("VG %s not found after rescanning devices.", vg->name);
+		goto out;
 	}

 	/*
-	 * Do the same scanning above for any new PVs.
+	 * Do a full rescan of devices, then look up which devices the
+	 * scan found for this VG name, and select those devices to
+	 * read metadata from in the loop above (rather than the list
+	 * of devices we created from our last copy of the vg metadata.)
+	 *
+	 * Case 1: VG we knew is no longer on any of the devices we knew it
+	 * to be on (save_vg is NULL, which means the metadata wasn't found
+	 * when reading mdas on each of the initial pvs_scan devices).
+	 * Rescan all devs and then retry reading metadata from the devs that
+	 * the scan finds associated with this VG.
+	 *
+	 * Case 2: VG has new PVs but we don't know what devices they are
+	 * so rescan all devs and then retry reading metadata from the devs
+	 * that the scan finds associated with this VG.
+	 *
+	 * (N.B. after a retry, we don't check for found_new_pvs.)
 	 */
-	if (!dm_list_empty(&pvs_new)) {
-		dm_list_init(&pvs_scan);
-		dm_list_splice(&pvs_scan, &pvs_new);
-		dm_list_init(&pvs_new);
-		log_debug_lvmetad("Rescan VG %s found new PVs to scan.", vg->name);
-		goto scan_more;
-	}
+	if (!save_vg || found_new_pvs) {
+		if (!save_vg)
+			log_debug_lvmetad("Rescan VG %s did not find VG on previous devs.", vg->name);
+		if (found_new_pvs)
+			log_debug_lvmetad("Rescan VG %s scanning all devs to find new PVs.", vg->name);

-	if (missing_devs) {
-		if (vgmeta_ret)
-			dm_config_destroy(vgmeta_ret);
-		return_NULL;
+		label_scan_force(cmd);
+
+		if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, NULL))) {
+			log_error("VG %s vg info not found after rescanning devices.", vg->name);
+			goto out;
+		}
+
+		/*
+		 * Set pvs_scan to devs that the label scan found
+		 * in the VG and retry the metadata reading loop.
+		 */
+		dm_list_init(&pvs_scan);
+
+		if (!lvmcache_get_vg_devs(cmd, vginfo, &pvs_scan)) {
+			log_error("VG %s info devs not found after rescanning devices.", vg->name);
+			goto out;
+		}
+
+		log_debug_lvmetad("Rescan VG %s has %d PVs after label scan.",
+				  vg->name, dm_list_size(&pvs_scan));
+
+		if (save_vg)
+			release_vg(save_vg);
+		if (save_meta)
+			dm_config_destroy(save_meta);
+		save_vg = NULL;
+		save_meta = NULL;
+		save_dev = NULL;
+		save_seqno = 0;
+		found_new_pvs = 0;
+		retried_reads = 1;
+		goto retry_reads;
 	}

 	/*
@@ -2019,52 +2200,50 @@ scan_more:
 	dm_list_iterate_items(devl, &pvs_drop) {
 		if (!devl->dev)
 			continue;
-		log_debug_lvmetad("Rescan VG %s dropping %s.", vg->name, dev_name(devl->dev));
-		if (!lvmetad_pv_gone_by_dev(devl->dev))
-			return_NULL;
+		log_debug_lvmetad("Rescan VG %s removing %s from lvmetad.", vg->name, dev_name(devl->dev));
+		if (!lvmetad_pv_gone_by_dev(devl->dev)) {
+			/* FIXME: use an error path that disables lvmetad */
+			log_error("Failed to remove %s from lvmetad.", dev_name(devl->dev));
+		}
 	}

 	/*
-	 * Update the VG in lvmetad.
+	 * Update lvmetad with the newly read version of the VG.
+	 * When the seqno is unchanged the cached VG can be left.
 	 */
-	if (vgmeta_ret) {
-		fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
-		if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) {
-			log_error("VG import from config tree failed");
-			lvmcache_fmt(info)->ops->destroy_instance(fid);
-			goto out;
+	if (save_vg && (save_seqno != vg->seqno)) {
+		dm_list_iterate_items(devl, &pvs_scan) {
+			if (!devl->dev)
+				continue;
+			log_debug_lvmetad("Rescan VG %s removing %s from lvmetad to replace.",
+					  vg->name, dev_name(devl->dev));
+			if (!lvmetad_pv_gone_by_dev(devl->dev)) {
+				/* FIXME: use an error path that disables lvmetad */
+				log_error("Failed to remove %s from lvmetad.", dev_name(devl->dev));
+			}
 		}

+		log_debug_lvmetad("Rescan VG %s updating lvmetad from seqno %u to seqno %u.",
+				  vg->name, vg->seqno, save_seqno);
+
 		/*
-		 * Update lvmetad with the newly read version of the VG.
-		 * When the seqno is unchanged the cached VG can be left.
+		 * If this vg_update fails the cached metadata in
+		 * lvmetad will remain invalid.
 		 */
-		if (save_seqno != vg->seqno) {
-			dm_list_iterate_items(devl, &pvs_scan) {
-				if (!devl->dev)
-					continue;
-				log_debug_lvmetad("Rescan VG %s dropping to replace %s.", vg->name, dev_name(devl->dev));
-				if (!lvmetad_pv_gone_by_dev(devl->dev))
-					return_NULL;
-			}
-
-			log_debug_lvmetad("Rescan VG %s updating lvmetad from seqno %u to seqno %u.",
-					  vg->name, vg->seqno, save_seqno);
-
-			/*
-			 * If this vg_update fails the cached metadata in
-			 * lvmetad will remain invalid.
-			 */
-			vg_ret->lvmetad_update_pending = 1;
-			if (!lvmetad_vg_update_finish(vg_ret))
-				log_error("Failed to update lvmetad with new VG meta");
+		save_vg->lvmetad_update_pending = 1;
+		if (!lvmetad_vg_update_finish(save_vg)) {
+			/* FIXME: use an error path that disables lvmetad */
+			log_error("Failed to update lvmetad with new VG meta");
 		}
-		dm_config_destroy(vgmeta_ret);
 	}
 out:
-	if (vg_ret)
-		log_debug_lvmetad("Rescan VG %s done (seqno %u).", vg_ret->name, vg_ret->seqno);
-	return vg_ret;
+	if (!save_vg && fid)
+		fmt->ops->destroy_instance(fid);
+	if (save_meta)
+		dm_config_destroy(save_meta);
+	if (save_vg)
+		log_debug_lvmetad("Rescan VG %s done (new seqno %u).", save_vg->name, save_vg->seqno);
+	return save_vg;
 }

 int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
@@ -2074,9 +2253,12 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 	struct label *label;
 	struct lvmcache_info *info;
 	struct _lvmetad_pvscan_baton baton;
+	const struct format_type *fmt;
 	/* Create a dummy instance. */
 	struct format_instance_ctx fic = { .type = 0 };

+	log_debug_lvmetad("Scan metadata from dev %s", dev_name(dev));
+
 	if (!lvmetad_used()) {
 		log_error("Cannot proceed since lvmetad is not active.");
 		return 0;
@@ -2087,23 +2269,31 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 		return 1;
 	}

-	if (!label_read(dev, &label, 0)) {
-		log_print_unless_silent("No PV label found on %s.", dev_name(dev));
+	if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) {
+		log_print_unless_silent("No PV info found on %s for PVID %s.", dev_name(dev), dev->pvid);
 		if (!lvmetad_pv_gone_by_dev(dev))
 			goto_bad;
 		return 1;
 	}

-	info = (struct lvmcache_info *) label->info;
+	if (!(label = lvmcache_get_label(info))) {
+		log_print_unless_silent("No PV label found for %s.", dev_name(dev));
+		if (!lvmetad_pv_gone_by_dev(dev))
+			goto_bad;
+		return 1;
+	}

+	fmt = lvmcache_fmt(info);
+
+	baton.cmd = cmd;
 	baton.vg = NULL;
-	baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+	baton.fid = fmt->ops->create_instance(fmt, &fic);

 	if (!baton.fid)
 		goto_bad;

-	if (baton.fid->fmt->features & FMT_OBSOLETE) {
-		lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+	if (fmt->features & FMT_OBSOLETE) {
+		fmt->ops->destroy_instance(baton.fid);
 		log_warn("WARNING: Disabling lvmetad cache which does not support obsolete (lvm1) metadata.");
 		lvmetad_set_disabled(cmd, LVMETAD_DISABLE_REASON_LVM1);
 		_found_lvm1_metadata = 1;
@@ -2117,9 +2307,9 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 	lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);

 	if (!baton.vg)
-		lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+		fmt->ops->destroy_instance(baton.fid);

-	if (!lvmetad_pv_found(cmd, (const struct id *) &dev->pvid, dev, lvmcache_fmt(info),
+	if (!lvmetad_pv_found(cmd, (const struct id *) &dev->pvid, dev, fmt,
 			      label->sector, baton.vg, found_vgnames, changed_vgnames)) {
 		release_vg(baton.vg);
 		goto_bad;
@@ -2185,6 +2375,13 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait)
 		replacing_other_update = 1;
 	}

+	label_scan(cmd);
+
+	if (lvmcache_found_duplicate_pvs()) {
+		log_warn("WARNING: Scan found duplicate PVs.");
+		return 0;
+	}
+
 	log_verbose("Scanning all devices to update lvmetad.");

 	if (!(iter = dev_iter_create(cmd->lvmetad_filter, 1))) {
@@ -2555,6 +2752,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
 	 */
 	_lvmetad_get_pv_cache_list(cmd, &pvc_before);

+	log_debug_lvmetad("Rescan all devices to validate global cache.");
+
 	/*
 	 * Update the local lvmetad cache so it correctly reflects any
 	 * changes made on remote hosts.  (It's possible that this command
@@ -2623,7 +2822,7 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
 		_update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after);
 	}

-	log_debug_lvmetad("Validating global lvmetad cache finished");
+	log_debug_lvmetad("Rescanned all devices");
 }

 int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid)
--- a/lib/commands/toolcontext.c
+++ b/lib/commands/toolcontext.c
@@ -542,6 +542,7 @@ static int _process_config(struct cmd_context *cmd)
 	const struct dm_config_value *cv;
 	int64_t pv_min_kb;
 	int udev_disabled = 0;
+	int scan_size;
 	char sysfs_dir[PATH_MAX];

 	if (!_check_config(cmd))
@@ -625,6 +626,29 @@ static int _process_config(struct cmd_context *cmd)
 	cmd->default_settings.udev_sync = udev_disabled ? 0 :
 		find_config_tree_bool(cmd, activation_udev_sync_CFG, NULL);

+#ifdef AIO_SUPPORT
+	cmd->use_aio = find_config_tree_bool(cmd, devices_scan_async_CFG, NULL);
+#else
+	cmd->use_aio = 0;
+	if (find_config_tree_bool(cmd, devices_scan_async_CFG, NULL))
+		log_verbose("Ignoring scan_async, no async I/O support.");
+#endif
+	scan_size = find_config_tree_int(cmd, devices_scan_size_CFG, NULL);
+
+	if (!scan_size || (scan_size < 0)) {
+		log_warn("WARNING: Ignoring invalid metadata/scan_size %d, using default %u.",
+			 scan_size, DEFAULT_SCAN_SIZE_KB);
+		scan_size = DEFAULT_SCAN_SIZE_KB;
+	}
+
+	if (cmd->use_aio && (scan_size % 4)) {
+		log_warn("WARNING: Ignoring invalid metadata/scan_size %d with scan_async, using default %u.",
+			 scan_size, DEFAULT_SCAN_SIZE_KB);
+		scan_size = DEFAULT_SCAN_SIZE_KB;
+	}
+
+	cmd->default_settings.scan_size_kb = scan_size;
+
 	/*
 	 * Set udev_fallback lazily on first use since it requires
 	 * checking DM driver version which is an extra ioctl!
@@ -685,9 +709,6 @@ static int _process_config(struct cmd_context *cmd)
 	if (find_config_tree_bool(cmd, report_two_word_unknown_device_CFG, NULL))
 		init_unknown_device_name("unknown device");

-	init_detect_internal_vg_cache_corruption
-		(find_config_tree_bool(cmd, global_detect_internal_vg_cache_corruption_CFG, NULL));
-
 	if (!_init_system_id(cmd))
 		return_0;

@@ -1996,7 +2017,6 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
 	if (set_filters && !init_filters(cmd, 1))
 		goto_out;

-	cmd->default_settings.cache_vgmetadata = 1;
 	cmd->current_settings = cmd->default_settings;

 	cmd->initialized.config = 1;
@@ -2226,6 +2246,9 @@ void destroy_toolcontext(struct cmd_context *cmd)
 	    !cmd->filter->dump(cmd->filter, 1))
 		stack;

+	if (cmd->ac)
+		dev_async_context_destroy(cmd->ac);
+
 	archive_exit(cmd);
 	backup_exit(cmd);
 	lvmcache_destroy(cmd, 0, 0);
--- a/lib/commands/toolcontext.h
+++ b/lib/commands/toolcontext.h
@@ -39,7 +39,7 @@ struct config_info {
 	int udev_rules;
 	int udev_sync;
 	int udev_fallback;
-	int cache_vgmetadata;
+	int scan_size_kb;
 	const char *msg_prefix;
 	const char *fmt_name;
 	uint64_t unit_factor;
@@ -164,6 +164,8 @@ struct cmd_context {
 	unsigned vg_notify:1;
 	unsigned lv_notify:1;
 	unsigned pv_notify:1;
+	unsigned use_aio:1;
+	unsigned pvscan_cache_single:1;

 	/*
 	 * Filtering.
@@ -223,6 +225,7 @@ struct cmd_context {
 	const char *time_format;
 	unsigned rand_seed;
 	struct dm_list unused_duplicate_devs; /* save preferences between lvmcache instances */
+	struct dev_async_context *ac; /* for async i/o */
 };

 /*
--- a/lib/config/config.c
+++ b/lib/config/config.c
@@ -23,6 +23,7 @@
 #include "toolcontext.h"
 #include "lvm-file.h"
 #include "memlock.h"
+#include "lvmcache.h"

 #include <sys/stat.h>
 #include <sys/mman.h>
@@ -494,10 +495,11 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
 * and function avoids parsing of mda into config tree which
 * remains unmodified and should not be used.
 */
-int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
+int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async,
 			off_t offset, size_t size, off_t offset2, size_t size2,
 			checksum_fn_t checksum_fn, uint32_t checksum,
-			int checksum_only, int no_dup_node_check)
+			int checksum_only, int no_dup_node_check,
+			uint64_t *failed_flags)
 {
 	char *fb, *fe;
 	int r = 0;
@@ -510,6 +512,7 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 		log_error(INTERNAL_ERROR "config_file_read_fd: expected file, special file "
 					 "or profile config source, found %s config source.",
 					 _config_source_names[cs->type]);
+		*failed_flags |= FAILED_INTERNAL;
 		return 0;
 	}

@@ -517,13 +520,25 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 	if (!(dev->flags & DEV_REGULAR) || size2)
 		use_mmap = 0;

-	if (use_mmap) {
+	if (buf_async) {
+		if (!(buf = dm_malloc(size + size2))) {
+			log_error("Failed to allocate circular buffer.");
+			return 0;
+		}
+
+		memcpy(buf, buf_async + offset, size);
+		if (size2)
+			memcpy(buf + size, buf_async + offset2, size2);
+
+		fb = buf;
+	} else if (use_mmap) {
 		mmap_offset = offset % lvm_getpagesize();
 		/* memory map the file */
 		fb = mmap((caddr_t) 0, size + mmap_offset, PROT_READ,
 			  MAP_PRIVATE, dev_fd(dev), offset - mmap_offset);
 		if (fb == (caddr_t) (-1)) {
 			log_sys_error("mmap", dev_name(dev));
+			*failed_flags |= FAILED_VG_METADATA_IO;
 			goto out;
 		}
 		fb = fb + mmap_offset;
@@ -532,8 +547,10 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 			log_error("Failed to allocate circular buffer.");
 			return 0;
 		}
+
 		if (!dev_read_circular(dev, (uint64_t) offset, size,
 				       (uint64_t) offset2, size2, buf)) {
+			*failed_flags |= FAILED_VG_METADATA_IO;
 			goto out;
 		}
 		fb = buf;
@@ -543,17 +560,22 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 	    (checksum_fn(checksum_fn(INITIAL_CRC, (const uint8_t *)fb, size),
 			 (const uint8_t *)(fb + size), size2))) {
 		log_error("%s: Checksum error", dev_name(dev));
+		*failed_flags |= FAILED_VG_METADATA_CHECKSUM;
 		goto out;
 	}

 	if (!checksum_only) {
 		fe = fb + size + size2;
 		if (no_dup_node_check) {
-			if (!dm_config_parse_without_dup_node_check(cft, fb, fe))
+			if (!dm_config_parse_without_dup_node_check(cft, fb, fe)) {
+				*failed_flags |= FAILED_VG_METADATA_PARSE;
 				goto_out;
+			}
 		} else {
-			if (!dm_config_parse(cft, fb, fe))
+			if (!dm_config_parse(cft, fb, fe)) {
+				*failed_flags |= FAILED_VG_METADATA_PARSE;
 				goto_out;
+			}
 		}
 	}

@@ -566,6 +588,7 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 		/* unmap the file */
 		if (munmap(fb - mmap_offset, size + mmap_offset)) {
 			log_sys_error("munmap", dev_name(dev));
+			*failed_flags |= FAILED_INTERNAL;
 			r = 0;
 		}
 	}
@@ -579,6 +602,7 @@ int config_file_read(struct dm_config_tree *cft)
 	struct config_source *cs = dm_config_get_custom(cft);
 	struct config_file *cf;
 	struct stat info;
+	uint64_t failed_flags = 0;
 	int r;

 	if (!config_file_check(cft, &filename, &info))
@@ -601,8 +625,8 @@ int config_file_read(struct dm_config_tree *cft)
 		}
 	}

-	r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0,
-				(checksum_fn_t) NULL, 0, 0, 0);
+	r = config_file_read_fd(cft, cf->dev, NULL, 0, (size_t) info.st_size, 0, 0,
+				(checksum_fn_t) NULL, 0, 0, 0, &failed_flags);

 	if (!cf->keep_open) {
 		if (!dev_close(cf->dev))
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -239,10 +239,10 @@ config_source_t config_get_source_type(struct dm_config_tree *cft);
 typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size);

 struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
-int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
+int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, char *buf_async,
 			off_t offset, size_t size, off_t offset2, size_t size2,
 			checksum_fn_t checksum_fn, uint32_t checksum,
-			int skip_parse, int no_dup_node_check);
+			int skip_parse, int no_dup_node_check, uint64_t *failed_flags);
 int config_file_read(struct dm_config_tree *cft);
 struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source,
 						 struct cmd_context *cmd);
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
@@ -457,6 +457,26 @@ cfg(devices_allow_changes_with_duplicate_pvs_CFG, "allow_changes_with_duplicate_
 	"Enabling this setting allows the VG to be used as usual even with\n"
 	"uncertain devices.\n")

+cfg(devices_scan_async_CFG, "scan_async", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_SCAN_ASYNC, vsn(2, 2, 173), NULL, 0, NULL,
+	"Use async I/O to read headers and metadata from disks in parallel.\n")
+
+cfg(devices_scan_size_CFG, "scan_size", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_SCAN_SIZE_KB, vsn(2, 2, 173), NULL, 0, NULL,
+	"Number of KiB to read from each disk when scanning disks.\n"
+	"The initial scan size is intended to cover all the headers\n"
+	"and metadata that LVM places at the start of each disk so\n"
+	"that a single read operation can retrieve them all.\n"
+	"Any headers or metadata that lie beyond this size require\n"
+	"an additional disk read.\n")
+
+cfg(devices_async_events_CFG, "async_events", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_ASYNC_EVENTS, vsn(2, 2, 173), NULL, 0, NULL,
+	"Max number of concurrent async reads when scanning disks.\n"
+	"Up to this many disks can be read concurrently when scanning\n"
+	"disks with async I/O. If there are more disks than this,\n"
+	"they will be scanned serially with synchronous reads.\n"
+	"Increasing this number to match a larger number of disks may\n"
+	"improve performance, but will increase memory requirements.\n"
+	"This setting is limitted by the system aio configuration.\n")
+
 cfg_array(allocation_cling_tag_list_CFG, "cling_tag_list", allocation_CFG_SECTION, CFG_DEFAULT_UNDEFINED, CFG_TYPE_STRING, NULL, vsn(2, 2, 77), NULL, 0, NULL,
 	"Advise LVM which PVs to use when searching for new space.\n"
 	"When searching for free space to extend an LV, the 'cling' allocation\n"
@@ -868,11 +888,8 @@ cfg(global_abort_on_internal_errors_CFG, "abort_on_internal_errors", global_CFG_
 	"Treat any internal errors as fatal errors, aborting the process that\n"
 	"encountered the internal error. Please only enable for debugging.\n")

-cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION, vsn(2, 2, 96), NULL, 0, NULL,
-	"Internal verification of VG structures.\n"
-	"Check if CRC matches when a parsed VG is used multiple times. This\n"
-	"is useful to catch unexpected changes to cached VG structures.\n"
-	"Please only enable for debugging.\n")
+cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2, 2, 96), NULL, vsn(2, 2, 174), NULL,
+	"No longer used.\n")

 cfg(global_metadata_read_only_CFG, "metadata_read_only", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_METADATA_READ_ONLY, vsn(2, 2, 75), NULL, 0, NULL,
 	"No operations that change on-disk metadata are permitted.\n"
--- a/lib/config/defaults.h
+++ b/lib/config/defaults.h
@@ -60,6 +60,10 @@
 #define DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH 0
 #define DEFAULT_UNKNOWN_DEVICE_NAME "[unknown]"

+#define DEFAULT_SCAN_ASYNC 1
+#define DEFAULT_SCAN_SIZE_KB 128
+#define DEFAULT_ASYNC_EVENTS 100
+
 #define DEFAULT_SANLOCK_LV_EXTEND_MB 256

 #define DEFAULT_MIRRORLOG MIRROR_LOG_DISK
@@ -179,7 +183,6 @@
 #define DEFAULT_LOGLEVEL 0
 #define DEFAULT_INDENT 1
 #define DEFAULT_ABORT_ON_INTERNAL_ERRORS 0
-#define DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION 0
 #define DEFAULT_UNITS "r"
 #define DEFAULT_SUFFIX 1
 #define DEFAULT_HOSTTAGS 0
--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@@ -1081,6 +1081,8 @@ static void _full_scan(int dev_scan)
 	if (_cache.has_scanned && !dev_scan)
 		return;

+	log_debug_devs("Adding device paths to dev cache");
+
 	_insert_dirs(&_cache.dirs);

 	(void) dev_cache_index_devs();
@@ -1090,6 +1092,8 @@ static void _full_scan(int dev_scan)

 	_cache.has_scanned = 1;
 	init_full_scan_done(1);
+
+	log_debug_devs("Added %d device paths to dev cache", dm_hash_get_num_entries(_cache.names));
 }

 int dev_cache_has_scanned(void)
@@ -1558,3 +1562,61 @@ const char *dev_name(const struct device *dev)
 	return (dev && dev->aliases.n) ? dm_list_item(dev->aliases.n, struct dm_str_list)->str :
 	    unknown_device_name();
 }
+
+int device_list_remove(struct dm_list *devices, struct device *dev)
+{
+	struct device_id_list *dil;
+
+	dm_list_iterate_items(dil, devices) {
+		if (dil->dev == dev) {
+			dm_list_del(&dil->list);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+struct device_id_list *device_list_find_dev(struct dm_list *devices, struct device *dev)
+{
+	struct device_id_list *dil;
+
+	dm_list_iterate_items(dil, devices) {
+		if (dil->dev == dev)
+			return dil;
+	}
+
+	return NULL;
+}
+
+struct device_id_list *device_list_find_pvid(struct dm_list *devices, const char *pvid)
+{
+	struct device_id_list *dil;
+
+	dm_list_iterate_items(dil, devices) {
+		if (!strncmp(dil->pvid, pvid, ID_LEN))
+			return dil;
+	}
+
+	return NULL;
+}
+
+int device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst)
+{
+	struct device_id_list *dil;
+	struct device_id_list *dil_new;
+
+	dm_list_iterate_items(dil, src) {
+		if (!(dil_new = dm_pool_alloc(cmd->mem, sizeof(*dil_new)))) {
+			log_error("device_id_list alloc failed.");
+			return 0;
+		}
+
+		dil_new->dev = dil->dev;
+		strncpy(dil_new->pvid, dil->pvid, ID_LEN);
+		dm_list_add(dst, &dil_new->list);
+	}
+
+	return 1;
+}
+
--- a/lib/device/dev-cache.h
+++ b/lib/device/dev-cache.h
@@ -72,4 +72,10 @@ struct device *dev_iter_get(struct dev_iter *iter);

 void dev_reset_error_count(struct cmd_context *cmd);

+/* device_id_list elements */
+int device_list_remove(struct dm_list *devices, struct device *dev);
+struct device_id_list *device_list_find_dev(struct dm_list *devices, struct device *dev);
+struct device_id_list *device_list_find_pvid(struct dm_list *devices, const char *pvid);
+int device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst);
+
 #endif
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -827,3 +827,172 @@ int dev_set(struct device *dev, uint64_t offset, size_t len, int value)

 	return (len == 0);
 }
+
+#ifdef AIO_SUPPORT
+
+/* io_setup() wrapper */
+
+struct dev_async_context *dev_async_context_setup(unsigned async_event_count)
+{
+	struct dev_async_context *ac;
+	unsigned nr_events = DEFAULT_ASYNC_EVENTS;
+	int error;
+
+	if (async_event_count)
+		nr_events = async_event_count;
+
+	if (!(ac = malloc(sizeof(struct dev_async_context))))
+		return_0;
+
+	memset(ac, 0, sizeof(struct dev_async_context));
+
+	error = io_setup(nr_events, &ac->aio_ctx);
+
+	if (error < 0) {
+		log_warn("WARNING: async io setup error %d with %u events.", error, nr_events);
+		free(ac);
+		return_0;
+	}
+	return ac;
+}
+
+struct dev_async_io *dev_async_io_alloc(int buf_len)
+{
+	struct dev_async_io *aio;
+	char *buf;
+	char **p_buf;
+
+	/*
+	 * mem pool doesn't seem to work for this, probably because
+	 * of the memalign that follows.
+	 */
+	if (!(aio = malloc(sizeof(struct dev_async_io))))
+		return_0;
+
+	memset(aio, 0, sizeof(struct dev_async_io));
+
+	buf = NULL;
+	p_buf = &buf;
+
+	if (posix_memalign((void *)p_buf, getpagesize(), buf_len)) {
+		free(aio);
+		return_NULL;
+	}
+
+	memset(buf, 0, buf_len);
+
+	aio->buf = buf;
+	aio->buf_len = buf_len;
+	return aio;
+}
+
+void dev_async_context_destroy(struct dev_async_context *ac)
+{
+	io_destroy(ac->aio_ctx);
+	free(ac);
+}
+
+void dev_async_io_destroy(struct dev_async_io *aio)
+{
+	if (aio->buf)
+		free(aio->buf);
+	free(aio);
+}
+
+/* io_submit() wrapper */
+
+int dev_async_read_submit(struct dev_async_context *ac, struct dev_async_io *aio,
+			  struct device *dev, uint32_t len, uint64_t offset, int *nospace)
+{
+	struct iocb *iocb = &aio->iocb;
+	int error;
+
+	*nospace = 0;
+
+	if (len > aio->buf_len)
+		return_0;
+
+	aio->len = len;
+
+	iocb->data = aio;
+	iocb->aio_fildes = dev_fd(dev);
+	iocb->aio_lio_opcode = IO_CMD_PREAD;
+	iocb->u.c.buf = aio->buf;
+	iocb->u.c.nbytes = len;
+	iocb->u.c.offset = offset;
+
+	error = io_submit(ac->aio_ctx, 1, &iocb);
+	if (error == -EAGAIN)
+		*nospace = 1;
+	if (error < 0)
+		return 0;
+	return 1;
+}
+
+/* io_getevents() wrapper */
+
+int dev_async_getevents(struct dev_async_context *ac, int wait_count, struct timespec *timeout)
+{
+	int wait_nr;
+	int rv;
+	int i;
+
+ retry:
+	memset(&ac->events, 0, sizeof(ac->events));
+
+	if (wait_count >= MAX_GET_EVENTS)
+		wait_nr = MAX_GET_EVENTS;
+	else
+		wait_nr = wait_count;
+
+	rv = io_getevents(ac->aio_ctx, 1, wait_nr, (struct io_event *)&ac->events, timeout);
+
+	if (rv == -EINTR)
+		goto retry;
+	if (rv < 0)
+		return 0;
+	if (!rv)
+		return 1;
+
+	for (i = 0; i < rv; i++) {
+		struct iocb *iocb = ac->events[i].obj;
+		struct dev_async_io *aio = iocb->data;
+		aio->result = ac->events[i].res;
+		aio->done = 1;
+	}
+
+	return 1;
+}
+
+#else /* AIO_SUPPORT */
+
+struct dev_async_context *dev_async_context_setup(unsigned async_event_count)
+{
+	return NULL;
+}
+
+struct dev_async_io *dev_async_io_alloc(int buf_len)
+{
+	return NULL;
+}
+
+void dev_async_context_destroy(struct dev_async_context *ac)
+{
+}
+
+void dev_async_io_destroy(struct dev_async_io *aio)
+{
+}
+
+int dev_async_read_submit(struct dev_async_context *ac, struct dev_async_io *aio,
+			  struct device *dev, uint32_t len, uint64_t offset, int *nospace)
+{
+	return 0;
+}
+
+int dev_async_getevents(struct dev_async_context *ac, int wait_count, struct timespec *timeout)
+{
+	return 0;
+}
+
+#endif /* AIO_SUPPORT */
--- a/lib/device/device.h
+++ b/lib/device/device.h
@@ -19,6 +19,7 @@
 #include "uuid.h"

 #include <fcntl.h>
+#include <libaio.h>

 #define DEV_ACCESSED_W		0x00000001	/* Device written to? */
 #define DEV_REGULAR		0x00000002	/* Regular file? */
@@ -84,12 +85,40 @@ struct device_list {
 	struct device *dev;
 };

+struct device_id_list {
+	struct dm_list list;
+	struct device *dev;
+	char pvid[ID_LEN + 1];
+};
+
 struct device_area {
 	struct device *dev;
 	uint64_t start;		/* Bytes */
 	uint64_t size;		/* Bytes */
 };

+/*
+ * We'll collect the results of this many async reads
+ * in one system call.  It shouldn't matter much what
+ * number is used here.
+ */
+#define MAX_GET_EVENTS 16
+
+struct dev_async_context {
+	io_context_t aio_ctx;
+	struct io_event events[MAX_GET_EVENTS];
+};
+
+struct dev_async_io {
+	char *buf;
+	struct iocb iocb;
+	struct device *dev;
+	uint32_t buf_len; /* size of buf */
+	uint32_t len; /* size of submitted io */
+	int done;
+	int result;
+};
+
 /*
 * Support for external device info.
 */
@@ -144,4 +173,12 @@ void dev_destroy_file(struct device *dev);
 /* Return a valid device name from the alias list; NULL otherwise */
 const char *dev_name_confirmed(struct device *dev, int quiet);

+struct dev_async_context *dev_async_context_setup(unsigned async_event_count);
+struct dev_async_io *dev_async_io_alloc(int buf_len);
+void dev_async_context_destroy(struct dev_async_context *ac);
+void dev_async_io_destroy(struct dev_async_io *aio);
+int dev_async_read_submit(struct dev_async_context *ac, struct dev_async_io *aio,
+                          struct device *dev, uint32_t len, uint64_t offset, int *nospace);
+int dev_async_getevents(struct dev_async_context *ac, int wait_count, struct timespec *timeout);
+
 #endif
--- a/lib/format1/format1.c
+++ b/lib/format1/format1.c
@@ -180,9 +180,10 @@ out:
 static struct volume_group *_format1_vg_read(struct format_instance *fid,
 				     const char *vg_name,
 				     struct metadata_area *mda __attribute__((unused)),
-				     struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
-				     unsigned *use_previous_vg __attribute__((unused)),
-				     int single_device __attribute__((unused)))
+				     struct label_read_data *ld __attribute__((unused)),
+				     uint32_t last_meta_checksum __attribute__((unused)),
+				     size_t last_meta_size __attribute__((unused)),
+				     unsigned *last_meta_matches __attribute__((unused)))
 {
 	struct volume_group *vg;
 	struct disk_list *dl;
--- a/lib/format1/lvm1-label.c
+++ b/lib/format1/lvm1-label.c
@@ -55,7 +55,8 @@ static int _lvm1_write(struct label *label __attribute__((unused)), void *buf __
 }

 static int _lvm1_read(struct labeller *l, struct device *dev, void *buf,
-		 struct label **label)
+		 struct label_read_data *ld,
+		 struct label **label, uint64_t *failed_flags)
 {
 	struct pv_disk *pvd = (struct pv_disk *) buf;
 	struct vg_disk vgd;
--- a/lib/format_pool/format_pool.c
+++ b/lib/format_pool/format_pool.c
@@ -101,9 +101,10 @@ static int _check_usp(const char *vgname, struct user_subpool *usp, int sp_count
 static struct volume_group *_pool_vg_read(struct format_instance *fid,
 					  const char *vg_name,
 					  struct metadata_area *mda __attribute__((unused)),
-					  struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)),
-					  unsigned *use_previous_vg __attribute__((unused)),
-					  int single_device __attribute__((unused)))
+					  struct label_read_data *ld __attribute__((unused)),
+                                          uint32_t last_meta_checksum __attribute__((unused)),
+                                          size_t last_meta_size __attribute__((unused)),
+                                          unsigned *last_meta_matches __attribute__((unused)))
 {
 	struct volume_group *vg;
 	struct user_subpool *usp;
--- a/lib/format_pool/pool_label.c
+++ b/lib/format_pool/pool_label.c
@@ -56,7 +56,8 @@ static int _pool_write(struct label *label __attribute__((unused)), void *buf __
 }

 static int _pool_read(struct labeller *l, struct device *dev, void *buf,
-		 struct label **label)
+		 struct label_read_data *ld,
+		 struct label **label, uint64_t *failed_flags)
 {
 	struct pool_list pl;

--- a/lib/format_text/archive.c
+++ b/lib/format_text/archive.c
@@ -321,7 +321,7 @@ static void _display_archive(struct cmd_context *cmd, struct archive_file *af)
 	 * retrieve the archive time and description.
 	 */
 	/* FIXME Use variation on _vg_read */
-	if (!(vg = text_vg_import_file(tf, af->path, &when, &desc))) {
+	if (!(vg = text_read_metadata_file(tf, af->path, &when, &desc))) {
 		log_error("Unable to read archive file.");
 		tf->fmt->ops->destroy_instance(tf);
 		return;
--- a/lib/format_text/archiver.c
+++ b/lib/format_text/archiver.c
@@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd,
 	}

 	dm_list_iterate_items(mda, &tf->metadata_areas_in_use) {
-		if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0)))
+		if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, 0, 0, NULL)))
 			stack;
 		break;
 	}
--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -179,6 +179,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt,
 	char *buf=NULL;
 	struct device_area *area;
 	struct mda_context *mdac;
+	uint64_t failed_flags = 0;
 	int r=0;

 	mdac = (struct mda_context *) mda->metadata_locn;
@@ -190,7 +191,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt,
 	if (!dev_open_readonly(area->dev))
 		return_0;

-	if (!(mdah = raw_read_mda_header(fmt, area)))
+	if (!(mdah = raw_read_mda_header(fmt, area, NULL, &failed_flags)))
 		goto_out;

 	rlocn = mdah->raw_locns;
@@ -316,19 +317,35 @@ static void _xlate_mdah(struct mda_header *mdah)
 	}
 }

-static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area)
+static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area,
+				struct label_read_data *ld, uint64_t *failed_flags)
 {
-	if (!dev_open_readonly(dev_area->dev))
-		return_0;
-
-	if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) {
-		if (!dev_close(dev_area->dev))
-			stack;
+	if (!dev_open_readonly(dev_area->dev)) {
+		*failed_flags |= FAILED_INTERNAL;
 		return_0;
 	}

-	if (!dev_close(dev_area->dev))
+	if (!ld || (ld->buf_len < dev_area->start + MDA_HEADER_SIZE)) {
+		log_debug_metadata("Reading mda header sector from %s at %llu",
+			   	   dev_name(dev_area->dev), (unsigned long long)dev_area->start);
+
+		if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) {
+			if (!dev_close(dev_area->dev))
+				stack;
+			*failed_flags |= FAILED_MDA_HEADER_IO;
+			return_0;
+		}
+	} else {
+		log_debug_metadata("Copying mda header sector from %s buffer at %llu",
+			   	   dev_name(dev_area->dev), (unsigned long long)dev_area->start);
+
+		memcpy(mdah, ld->buf + dev_area->start, MDA_HEADER_SIZE);
+	}
+
+	if (!dev_close(dev_area->dev)) {
+		*failed_flags |= FAILED_INTERNAL;
 		return_0;
+	}

 	if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic,
 						  MDA_HEADER_SIZE -
@@ -336,6 +353,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev
 		log_error("Incorrect metadata area header checksum on %s"
 			  " at offset %"PRIu64, dev_name(dev_area->dev),
 			  dev_area->start);
+		*failed_flags |= FAILED_MDA_HEADER_CHECKSUM;
 		return 0;
 	}

@@ -345,6 +363,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev
 		log_error("Wrong magic number in metadata area header on %s"
 			  " at offset %"PRIu64, dev_name(dev_area->dev),
 			  dev_area->start);
+		*failed_flags |= FAILED_MDA_HEADER_FIELD;
 		return 0;
 	}

@@ -352,6 +371,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev
 		log_error("Incompatible metadata area header version: %d on %s"
 			  " at offset %"PRIu64, mdah->version,
 			  dev_name(dev_area->dev), dev_area->start);
+		*failed_flags |= FAILED_MDA_HEADER_FIELD;
 		return 0;
 	}

@@ -359,6 +379,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev
 		log_error("Incorrect start sector in metadata area header: %"
 			  PRIu64" on %s at offset %"PRIu64, mdah->start,
 			  dev_name(dev_area->dev), dev_area->start);
+		*failed_flags |= FAILED_MDA_HEADER_FIELD;
 		return 0;
 	}

@@ -366,16 +387,19 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev
 }

 struct mda_header *raw_read_mda_header(const struct format_type *fmt,
-				       struct device_area *dev_area)
+				       struct device_area *dev_area,
+				       struct label_read_data *ld,
+				       uint64_t *failed_flags)
 {
 	struct mda_header *mdah;

 	if (!(mdah = dm_pool_alloc(fmt->cmd->mem, MDA_HEADER_SIZE))) {
 		log_error("struct mda_header allocation failed");
+		*failed_flags |= FAILED_INTERNAL;
 		return NULL;
 	}

-	if (!_raw_read_mda_header(mdah, dev_area)) {
+	if (!_raw_read_mda_header(mdah, dev_area, ld, failed_flags)) {
 		dm_pool_free(fmt->cmd->mem, mdah);
 		return NULL;
 	}
@@ -402,20 +426,21 @@ static int _raw_write_mda_header(const struct format_type *fmt,
 	return 1;
 }

-static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
+/*
+ * FIXME: unify this with read_metadata_location_summary() which is used
+ * in the label scanning path.
+ */
+
+static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area,
 				       struct mda_header *mdah,
+				       struct label_read_data *ld,
 				       const char *vgname,
-				       int *precommitted)
+				       int *precommitted,
+				       uint64_t *failed_flags)
 {
 	size_t len;
 	char vgnamebuf[NAME_LEN + 2] __attribute__((aligned(8)));
 	struct raw_locn *rlocn, *rlocn_precommitted;
-	struct lvmcache_info *info;
-	struct lvmcache_vgsummary vgsummary_orphan = {
-		.vgname = FMT_TEXT_ORPHAN_VG_NAME,
-	};
-
-	memcpy(&vgsummary_orphan.vgid, FMT_TEXT_ORPHAN_VG_NAME, sizeof(FMT_TEXT_ORPHAN_VG_NAME));

 	rlocn = mdah->raw_locns;	/* Slot 0 */
 	rlocn_precommitted = rlocn + 1;	/* Slot 1 */
@@ -427,9 +452,14 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
 	} else
 		*precommitted = 0;

+	/* FIXME: why does read_metadata_location_summary() only check for zero offset? */
 	/* Do not check non-existent metadata. */
-	if (!rlocn->offset && !rlocn->size)
+	if (!rlocn->offset && !rlocn->size) {
+		log_error("Metadata location on %s at %"PRIu64" has zero offset and size.",
+			  dev_name(dev_area->dev), dev_area->start);
+		*failed_flags |= FAILED_MDA_HEADER_RLOCN;
 		return NULL;
+	}

 	/*
 	 * Don't try to check existing metadata
@@ -438,25 +468,31 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area,
 	if (!*vgname)
 		return rlocn;

-	/* FIXME Loop through rlocns two-at-a-time.  List null-terminated. */
-	/* FIXME Ignore if checksum incorrect!!! */
-	if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset,
-		      sizeof(vgnamebuf), vgnamebuf))
-		goto_bad;
+	/*
+	 * Verify that the VG metadata pointed to by the rlocn
+	 * begins with a valid vgname.
+	 */
+	if (!ld || (ld->buf_len < dev_area->start + rlocn->offset + NAME_LEN)) {
+		/* FIXME Loop through rlocns two-at-a-time.  List null-terminated. */
+		/* FIXME Ignore if checksum incorrect!!! */
+		if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset,
+		      	      sizeof(vgnamebuf), vgnamebuf)) {
+			*failed_flags |= FAILED_VG_METADATA_IO;
+			return NULL;
+		}
+	} else {
+		memset(vgnamebuf, 0, sizeof(vgnamebuf));
+		memcpy(vgnamebuf, ld->buf + dev_area->start + rlocn->offset, NAME_LEN);
+	}

 	if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) &&
 	    (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{'))
 		return rlocn;

-	log_debug_metadata("Volume group name found in metadata on %s at %" PRIu64 " does "
-			   "not match expected name %s.", 
-			   dev_name(dev_area->dev), dev_area->start + rlocn->offset, vgname);
-
-      bad:
-	if ((info = lvmcache_info_from_pvid(dev_area->dev->pvid, dev_area->dev, 0)) &&
-	    !lvmcache_update_vgname_and_id(info, &vgsummary_orphan))
-		stack;
+	log_error("Volume group name found in metadata on %s at %llu does not match expected name %s.",
+		  dev_name(dev_area->dev), (unsigned long long)dev_area->start + rlocn->offset, vgname);

+	*failed_flags |= FAILED_VG_METADATA_FIELD;
 	return NULL;
 }

@@ -484,14 +520,15 @@ static int _raw_holds_vgname(struct format_instance *fid,
 	int r = 0;
 	int noprecommit = 0;
 	struct mda_header *mdah;
+	uint64_t failed_flags = 0;

 	if (!dev_open_readonly(dev_area->dev))
 		return_0;

-	if (!(mdah = raw_read_mda_header(fid->fmt, dev_area)))
+	if (!(mdah = raw_read_mda_header(fid->fmt, dev_area, NULL, &failed_flags)))
 		return_0;

-	if (_find_vg_rlocn(dev_area, mdah, vgname, &noprecommit))
+	if (_read_metadata_location_vg(dev_area, mdah, NULL, vgname, &noprecommit, &failed_flags))
 		r = 1;

 	if (!dev_close(dev_area->dev))
@@ -500,13 +537,20 @@ static int _raw_holds_vgname(struct format_instance *fid,
 	return r;
 }

-static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
+/*
+ * FIXME: unify with the very similar _read_mda_header_and_metadata_summary()
+ * that is used during the scanning phase.
+ */
+
+static struct volume_group *_read_mda_header_and_metadata_vg(struct format_instance *fid,
 					      const char *vgname,
+					      struct metadata_area *mda,
 					      struct device_area *area,
-					      struct cached_vg_fmtdata **vg_fmtdata,
-					      unsigned *use_previous_vg,
-					      int precommitted,
-					      int single_device)
+					      struct label_read_data *ld,
+					      uint32_t last_meta_checksum,
+					      size_t last_meta_size,
+					      unsigned *last_meta_matches,
+					      int precommitted)
 {
 	struct volume_group *vg = NULL;
 	struct raw_locn *rlocn;
@@ -514,12 +558,36 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
 	time_t when;
 	char *desc;
 	uint32_t wrap = 0;
+	uint64_t failed_flags = 0;

-	if (!(mdah = raw_read_mda_header(fid->fmt, area)))
+	if (!(mdah = raw_read_mda_header(fid->fmt, area, ld, &failed_flags))) {
+		log_debug_metadata("MDA header on %s at %"PRIu64" is not valid.",
+				   dev_name(area->dev), area->start);
+		if (mda)
+			mda->read_failed_flags |= failed_flags;
 		goto_out;
+	}

-	if (!(rlocn = _find_vg_rlocn(area, mdah, vgname, &precommitted))) {
-		log_debug_metadata("VG %s not found on %s", vgname, dev_name(area->dev));
+	/* pass back to vg_read() */
+	if (mda)
+		mda->header_start = mdah->start;
+
+	/*
+	 * N.B. in the label scan path:
+	 * read_mda_header_and_metadata_summary() calls only
+	 * read_metadata_location_summary() which calls
+	 * text_read_metadata_summary()
+	 *
+	 * but in the vg_read path:
+	 * read_mda_header_and_metadata_vg() calls both
+	 * read_metadata_location_vg() and
+	 * text_read_metadata_vg()
+	 */
+
+	if (!(rlocn = _read_metadata_location_vg(area, mdah, ld, vgname, &precommitted, &failed_flags))) {
+		log_debug_metadata("Metadata location on %s returned no location.", dev_name(area->dev));
+		if (mda)
+			mda->read_failed_flags |= failed_flags;
 		goto out;
 	}

@@ -527,30 +595,48 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
 		wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size);

 	if (wrap > rlocn->offset) {
-		log_error("VG %s metadata too large for circular buffer",
-			  vgname);
+		log_error("Metadata location on %s at %"PRIu64" is too large for circular buffer.",
+			  dev_name(area->dev), area->start + rlocn->offset);
+		if (mda)
+			mda->read_failed_flags |= FAILED_VG_METADATA_SIZE;
 		goto out;
 	}

-	/* FIXME 64-bit */
-	if (!(vg = text_vg_import_fd(fid, NULL, vg_fmtdata, use_previous_vg, single_device, area->dev, 
-				     (off_t) (area->start + rlocn->offset),
-				     (uint32_t) (rlocn->size - wrap),
-				     (off_t) (area->start + MDA_HEADER_SIZE),
-				     wrap, calc_crc, rlocn->checksum, &when,
-				     &desc)) && (!use_previous_vg || !*use_previous_vg))
-		goto_out;
+	/* pass back to vg_read() */
+	mda->vg_read_meta_checksum = rlocn->checksum;
+	mda->vg_read_meta_size = rlocn->size;

-	if (vg)
-		log_debug_metadata("Read %s %smetadata (%u) from %s at %" PRIu64 " size %"
-				   PRIu64, vg->name, precommitted ? "pre-commit " : "",
-				   vg->seqno, dev_name(area->dev),
-				   area->start + rlocn->offset, rlocn->size);
-	else
-		log_debug_metadata("Skipped reading %smetadata from %s at %" PRIu64 " size %"
-				   PRIu64 " with matching checksum.", precommitted ? "pre-commit " : "",
-				   dev_name(area->dev),
-				   area->start + rlocn->offset, rlocn->size);
+	vg = text_read_metadata_vg(fid, area->dev, NULL, ld,
+				   (off_t) (area->start + rlocn->offset),
+				   (uint32_t) (rlocn->size - wrap),
+				   (off_t) (area->start + MDA_HEADER_SIZE),
+				   wrap,
+				   last_meta_checksum,
+				   last_meta_size,
+				   last_meta_matches,
+				   calc_crc,
+				   rlocn->checksum,
+				   &when, &desc, &failed_flags);
+
+	if (!vg) {
+		/*
+		 * FIXME: success/failure from this function is terribly defined.
+		 * NULL vg also means that the checksum optimization has been used.
+		 */
+
+		if (failed_flags) {
+			log_error("Metadata location on %s at %"PRIu64" has invalid metadata for VG.",
+				  dev_name(area->dev), area->start + rlocn->offset);
+			if (mda)
+				mda->read_failed_flags |= failed_flags;
+		}
+	}
+
+	log_debug_metadata("Metadata location on %s at %"PRIu64" size %"PRIu64" has VG %s",
+			   dev_name(area->dev),
+			   area->start + rlocn->offset,
+			   rlocn->size,
+			   vg ? vg->name : "");

 	if (vg && precommitted)
 		vg->status |= PRECOMMITTED;
@@ -559,20 +645,93 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid,
 	return vg;
 }

+static void _set_pv_device(struct format_instance *fid,
+			   struct volume_group *vg,
+			   struct physical_volume *pv)
+{
+	uint64_t size;
+
+	/*
+	 * Convert the uuid into a device.
+	 */
+	if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single) {
+		if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, &pv->label_sector))) {
+			char buffer[64] __attribute__((aligned(8)));
+
+			if (!id_write_format(&pv->id, buffer, sizeof(buffer)))
+				buffer[0] = '\0';
+			log_error_once("Couldn't find device with uuid %s.", buffer);
+		}
+	} else {
+		log_debug_metadata("Skip metadata pvid to device lookup for lvmetad pvscan.");
+	}
+
+	/* TODO is the !lvmetad_used() too coarse here? */
+	if (!pv->dev && !lvmetad_used())
+		pv->status |= MISSING_PV;
+
+	if ((pv->status & MISSING_PV) && pv->dev && pv_mda_used_count(pv) == 0) {
+		pv->status &= ~MISSING_PV;
+		log_info("Recovering a previously MISSING PV %s with no MDAs.",
+			 pv_dev_name(pv));
+	}
+
+	/* Fix up pv size if missing or impossibly large */
+	if ((!pv->size || pv->size > (1ULL << 62)) && pv->dev) {
+		if (!dev_get_size(pv->dev, &pv->size)) {
+			log_error("%s: Couldn't get size.", pv_dev_name(pv));
+			return;
+		}
+		log_verbose("Fixing up missing size (%s) "
+			    "for PV %s", display_size(fid->fmt->cmd, pv->size),
+			    pv_dev_name(pv));
+		size = pv->pe_count * (uint64_t) vg->extent_size + pv->pe_start;
+		if (size > pv->size)
+			log_warn("WARNING: Physical Volume %s is too large "
+				 "for underlying device", pv_dev_name(pv));
+	}
+}
+
+/*
+ * Finds the 'struct device' that correponds to each PV in the metadata,
+ * and may make some adjustments to vg fields based on the dev properties.
+ *
+ * FIXME: we shouldn't call this for each mda.  It should be done
+ * once by the layer above this on the final vg.
+ */
+void set_pv_devices(struct format_instance *fid, struct volume_group *vg)
+{
+	struct pv_list *pvl;
+
+	dm_list_iterate_items(pvl, &vg->pvs)
+		_set_pv_device(fid, vg, pvl->pv);
+
+	dm_list_iterate_items(pvl, &vg->pvs_outdated)
+		_set_pv_device(fid, vg, pvl->pv);
+}
+
 static struct volume_group *_vg_read_raw(struct format_instance *fid,
 					 const char *vgname,
 					 struct metadata_area *mda,
-					 struct cached_vg_fmtdata **vg_fmtdata,
-					 unsigned *use_previous_vg,
-					 int single_device)
+					 struct label_read_data *ld,
+					 uint32_t last_meta_checksum,
+					 size_t last_meta_size,
+					 unsigned *last_meta_matches)
 {
 	struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
 	struct volume_group *vg;

-	if (!dev_open_readonly(mdac->area.dev))
+	if (!dev_open_readonly(mdac->area.dev)) {
+		mda->read_failed_flags |= FAILED_INTERNAL;
 		return_NULL;
+	}

-	vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, single_device);
+	vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld,
+					      last_meta_checksum, last_meta_size, last_meta_matches, 0);
+
+	/* FIXME: move this into vg_read() */
+	if (vg)
+		set_pv_devices(fid, vg);

 	if (!dev_close(mdac->area.dev))
 		stack;
@@ -583,16 +742,25 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid,
 static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid,
 						   const char *vgname,
 						   struct metadata_area *mda,
-						   struct cached_vg_fmtdata **vg_fmtdata,
-						   unsigned *use_previous_vg)
+						   struct label_read_data *ld,
+						   uint32_t last_meta_checksum,
+						   size_t last_meta_size,
+						   unsigned *last_meta_matches)
 {
 	struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
 	struct volume_group *vg;

-	if (!dev_open_readonly(mdac->area.dev))
+	if (!dev_open_readonly(mdac->area.dev)) {
+		mda->read_failed_flags |= FAILED_INTERNAL;
 		return_NULL;
+	}

-	vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, 0);
+	vg = _read_mda_header_and_metadata_vg(fid, vgname, mda, &mdac->area, ld,
+					      last_meta_checksum, last_meta_size, last_meta_matches, 1);
+
+	/* FIXME: move this into vg_read() */
+	if (vg)
+		set_pv_devices(fid, vg);

 	if (!dev_close(mdac->area.dev))
 		stack;
@@ -613,6 +781,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
 	int found = 0;
 	int noprecommit = 0;
 	const char *old_vg_name = NULL;
+	uint64_t failed_flags = 0;

 	/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
 	dm_list_iterate_items(pvl, &vg->pvs) {
@@ -630,10 +799,10 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
 	if (!dev_open(mdac->area.dev))
 		return_0;

-	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area)))
+	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL, &failed_flags)))
 		goto_out;

-	rlocn = _find_vg_rlocn(&mdac->area, mdah, old_vg_name ? : vg->name, &noprecommit);
+	rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, old_vg_name ? : vg->name, &noprecommit, &failed_flags);
 	mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah);

 	if (!fidtc->raw_metadata_buf &&
@@ -722,6 +891,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid,
 	int found = 0;
 	int noprecommit = 0;
 	const char *old_vg_name = NULL;
+	uint64_t failed_flags = 0;

 	/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
 	dm_list_iterate_items(pvl, &vg->pvs) {
@@ -736,10 +906,10 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid,
 	if (!found)
 		return 1;

-	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area)))
+	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL, &failed_flags)))
 		goto_out;

-	if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, old_vg_name ? : vg->name, &noprecommit))) {
+	if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, old_vg_name ? : vg->name, &noprecommit, &failed_flags))) {
 		mdah->raw_locns[0].offset = 0;
 		mdah->raw_locns[0].size = 0;
 		mdah->raw_locns[0].checksum = 0;
@@ -842,14 +1012,15 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg,
 	struct raw_locn *rlocn;
 	int r = 0;
 	int noprecommit = 0;
+	uint64_t failed_flags = 0;

 	if (!dev_open(mdac->area.dev))
 		return_0;

-	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area)))
+	if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, NULL, &failed_flags)))
 		goto_out;

-	if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, vg->name, &noprecommit))) {
+	if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, NULL, vg->name, &noprecommit, &failed_flags))) {
 		rlocn = &mdah->raw_locns[0];
 		mdah->raw_locns[1].offset = 0;
 	}
@@ -883,8 +1054,10 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid,
 	time_t when;
 	char *desc;

-	if (!(vg = text_vg_import_file(fid, read_path, &when, &desc)))
-		return_NULL;
+	if (!(vg = text_read_metadata_file(fid, read_path, &when, &desc))) {
+		log_error("Failed to read VG %s from %s", vgname, read_path);
+		return NULL;
+	}

 	/*
 	 * Currently you can only have a single volume group per
@@ -907,9 +1080,10 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid,
 static struct volume_group *_vg_read_file(struct format_instance *fid,
 					  const char *vgname,
 					  struct metadata_area *mda,
-					  struct cached_vg_fmtdata **vg_fmtdata,
-					  unsigned *use_previous_vg __attribute__((unused)),
-					  int single_device __attribute__((unused)))
+					  struct label_read_data *ld,
+					  uint32_t last_meta_checksum,
+					  size_t last_meta_size,
+					  unsigned *last_meta_matches)
 {
 	struct text_context *tc = (struct text_context *) mda->metadata_locn;

@@ -919,8 +1093,10 @@ static struct volume_group *_vg_read_file(struct format_instance *fid,
 static struct volume_group *_vg_read_precommit_file(struct format_instance *fid,
 						    const char *vgname,
 						    struct metadata_area *mda,
-						    struct cached_vg_fmtdata **vg_fmtdata,
-						    unsigned *use_previous_vg __attribute__((unused)))
+					  	    struct label_read_data *ld,
+						    uint32_t last_meta_checksum,
+					            size_t last_meta_size,
+					            unsigned *last_meta_matches)
 {
 	struct text_context *tc = (struct text_context *) mda->metadata_locn;
 	struct volume_group *vg;
@@ -1092,6 +1268,8 @@ static int _vg_remove_file(struct format_instance *fid __attribute__((unused)),
 	return 1;
 }

+/* used for independent_metadata_areas */
+
 static int _scan_file(const struct format_type *fmt, const char *vgname)
 {
 	struct dirent *dirent;
@@ -1107,6 +1285,9 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)

 	dir_list = &((struct mda_lists *) fmt->private)->dirs;

+	if (!dm_list_empty(dir_list))
+		log_debug_metadata("Scanning independent files for %s", vgname ? vgname : "VGs");
+
 	dm_list_iterate_items(dl, dir_list) {
 		if (!(d = opendir(dl->dir))) {
 			log_sys_error("opendir", dl->dir);
@@ -1139,10 +1320,14 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)
 					stack;
 					break;
 				}
+
+				log_debug_metadata("Scanning independent file %s for VG %s", path, scanned_vgname);
+
 				if ((vg = _vg_read_file_name(fid, scanned_vgname,
 							     path))) {
 					/* FIXME Store creation host in vg */
 					lvmcache_update_vg(vg, 0);
+					lvmcache_set_independent_location(vg->name);
 					release_vg(vg);
 				}
 			}
@@ -1154,22 +1339,26 @@ static int _scan_file(const struct format_type *fmt, const char *vgname)
 	return 1;
 }

-int vgname_from_mda(const struct format_type *fmt,
-		    struct mda_header *mdah, struct device_area *dev_area,
-		    struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors)
+int read_metadata_location_summary(const struct format_type *fmt,
+			struct mda_header *mdah,
+			struct label_read_data *ld,
+			struct device_area *dev_area,
+			struct lvmcache_vgsummary *vgsummary,
+			uint64_t *mda_free_sectors,
+			uint64_t *failed_flags)
 {
 	struct raw_locn *rlocn;
 	uint32_t wrap = 0;
 	unsigned int len = 0;
 	char buf[NAME_LEN + 1] __attribute__((aligned(8)));
 	uint64_t buffer_size, current_usage;
-	unsigned used_cached_metadata = 0;

 	if (mda_free_sectors)
 		*mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT;

 	if (!mdah) {
-		log_error(INTERNAL_ERROR "vgname_from_mda called with NULL pointer for mda_header");
+		log_error(INTERNAL_ERROR "read_metadata_location called with NULL pointer for mda_header");
+		*failed_flags |= FAILED_INTERNAL;
 		return 0;
 	}

@@ -1180,15 +1369,24 @@ int vgname_from_mda(const struct format_type *fmt,
 	 * If no valid offset, do not try to search for vgname
 	 */
 	if (!rlocn->offset) {
-		log_debug("%s: found metadata with offset 0.",
-			  dev_name(dev_area->dev));
+		log_error("Metadata location on %s at %"PRIu64" has offset 0.",
+			  dev_name(dev_area->dev), dev_area->start + rlocn->offset);
+		*failed_flags |= FAILED_MDA_HEADER_RLOCN;
 		return 0;
 	}

-	/* Do quick check for a vgname */
-	if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset,
-		      NAME_LEN, buf))
-		return_0;
+	/*
+	 * Verify that the VG metadata pointed to by the rlocn
+	 * begins with a valid vgname.
+	 */
+	if (!ld || (ld->buf_len < dev_area->start + rlocn->offset + NAME_LEN)) {
+		if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, buf)) {
+			*failed_flags |= FAILED_VG_METADATA_IO;
+			return_0;
+		}
+	} else {
+		memcpy(buf, ld->buf + dev_area->start + rlocn->offset, NAME_LEN);
+	}

 	while (buf[len] && !isspace(buf[len]) && buf[len] != '{' &&
 	       len < (NAME_LEN - 1))
@@ -1197,47 +1395,51 @@ int vgname_from_mda(const struct format_type *fmt,
 	buf[len] = '\0';

 	/* Ignore this entry if the characters aren't permissible */
-	if (!validate_name(buf))
+	if (!validate_name(buf)) {
+		log_error("Metadata location on %s at %"PRIu64" begins with invalid VG name.",
+			  dev_name(dev_area->dev), dev_area->start + rlocn->offset);
+		*failed_flags |= FAILED_VG_METADATA_FIELD;
 		return_0;
+	}

 	/* We found a VG - now check the metadata */
 	if (rlocn->offset + rlocn->size > mdah->size)
 		wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size);

 	if (wrap > rlocn->offset) {
-		log_error("%s: metadata too large for circular buffer",
-			  dev_name(dev_area->dev));
+		log_error("Metadata location on %s at %"PRIu64" is too large for circular buffer.",
+			  dev_name(dev_area->dev), dev_area->start + rlocn->offset);
+		*failed_flags |= FAILED_VG_METADATA_SIZE;
 		return 0;
 	}

-	/* Did we see this metadata before? */
 	vgsummary->mda_checksum = rlocn->checksum;
 	vgsummary->mda_size = rlocn->size;

-	if (lvmcache_lookup_mda(vgsummary))
-		used_cached_metadata = 1;
-
-	/* FIXME 64-bit */
-	if (!text_vgsummary_import(fmt, dev_area->dev,
+	if (!text_read_metadata_summary(fmt, dev_area->dev, ld,
 				(off_t) (dev_area->start + rlocn->offset),
 				(uint32_t) (rlocn->size - wrap),
 				(off_t) (dev_area->start + MDA_HEADER_SIZE),
 				wrap, calc_crc, vgsummary->vgname ? 1 : 0,
-				vgsummary))
+				vgsummary, failed_flags)) {
+		log_error("Metadata location on %s at %"PRIu64" has invalid summary for VG.",
+			  dev_name(dev_area->dev), dev_area->start + rlocn->offset);
 		return_0;
+	}

 	/* Ignore this entry if the characters aren't permissible */
-	if (!validate_name(vgsummary->vgname))
+	if (!validate_name(vgsummary->vgname)) {
+		log_error("Metadata location on %s at %"PRIu64" has invalid VG name.",
+			  dev_name(dev_area->dev), dev_area->start + rlocn->offset);
+		*failed_flags |= FAILED_VG_METADATA_FIELD;
 		return_0;
+	}

-	log_debug_metadata("%s: %s metadata at %" PRIu64 " size %" PRIu64
-			   " (in area at %" PRIu64 " size %" PRIu64
-			   ") for %s (" FMTVGID ")",
+	log_debug_metadata("Metadata location on %s at %"PRIu64" size %"PRIu64" has summary for VG %s",
 			   dev_name(dev_area->dev),
-			   used_cached_metadata ? "Using cached" : "Found",
 			   dev_area->start + rlocn->offset,
-			   rlocn->size, dev_area->start, dev_area->size, vgsummary->vgname,
-			   (char *)&vgsummary->vgid);
+			   rlocn->size,
+			   vgsummary->vgname);

 	if (mda_free_sectors) {
 		current_usage = (rlocn->size + SECTOR_SIZE - UINT64_C(1)) -
@@ -1253,6 +1455,8 @@ int vgname_from_mda(const struct format_type *fmt,
 	return 1;
 }

+/* used for independent_metadata_areas */
+
 static int _scan_raw(const struct format_type *fmt, const char *vgname __attribute__((unused)))
 {
 	struct raw_list *rl;
@@ -1261,30 +1465,38 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu
 	struct format_instance fid;
 	struct lvmcache_vgsummary vgsummary = { 0 };
 	struct mda_header *mdah;
+	uint64_t failed_flags = 0;

 	raw_list = &((struct mda_lists *) fmt->private)->raws;

+	if (!dm_list_empty(raw_list))
+		log_debug_metadata("Scanning independent raw locations for %s", vgname ? vgname : "VGs");
+
 	fid.fmt = fmt;
 	dm_list_init(&fid.metadata_areas_in_use);
 	dm_list_init(&fid.metadata_areas_ignored);

 	dm_list_iterate_items(rl, raw_list) {
+		log_debug_metadata("Scanning independent dev %s", dev_name(rl->dev_area.dev));
+
 		/* FIXME We're reading mdah twice here... */
 		if (!dev_open_readonly(rl->dev_area.dev)) {
 			stack;
 			continue;
 		}

-		if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area))) {
+		if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area, NULL, &failed_flags))) {
 			stack;
 			goto close_dev;
 		}

-		/* TODO: caching as in vgname_from_mda() (trigger this code?) */
-		if (vgname_from_mda(fmt, mdah, &rl->dev_area, &vgsummary, NULL)) {
-			vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0);
-			if (vg)
+		/* TODO: caching as in read_metadata_location_summary() (trigger this code?) */
+		if (read_metadata_location_summary(fmt, mdah, NULL, &rl->dev_area, &vgsummary, NULL, &failed_flags)) {
+			vg = _read_mda_header_and_metadata_vg(&fid, vgsummary.vgname, NULL, &rl->dev_area, NULL, 0, 0, NULL, 0);
+			if (vg) {
 				lvmcache_update_vg(vg, 0);
+				lvmcache_set_independent_location(vg->name);
+			}
 		}
 	close_dev:
 		if (!dev_close(rl->dev_area.dev))
@@ -1294,9 +1506,13 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu
 	return 1;
 }

+/* used for independent_metadata_areas */
+
 static int _text_scan(const struct format_type *fmt, const char *vgname)
 {
-	return (_scan_file(fmt, vgname) & _scan_raw(fmt, vgname));
+	_scan_file(fmt, vgname);
+	_scan_raw(fmt, vgname);
+	return 1;
 }

 struct _write_single_mda_baton {
@@ -1748,15 +1964,18 @@ static struct metadata_area_ops _metadata_text_raw_ops = {
 	.mda_import_text = _mda_import_text_raw
 };

+/* used only for sending info to lvmetad */
+
 static int _mda_export_text_raw(struct metadata_area *mda,
 				struct dm_config_tree *cft,
 				struct dm_config_node *parent)
 {
 	struct mda_context *mdc = (struct mda_context *) mda->metadata_locn;
-	char mdah[MDA_HEADER_SIZE]; /* temporary */

-	if (!mdc || !_raw_read_mda_header((struct mda_header *)mdah, &mdc->area))
+	if (!mdc) {
+		log_error(INTERNAL_ERROR "mda_export_text_raw no mdc");
 		return 1; /* pretend the MDA does not exist */
+	}

 	return config_make_nodes(cft, parent, NULL,
 				 "ignore = %" PRId64, (int64_t) mda_is_ignored(mda),
@@ -1766,6 +1985,8 @@ static int _mda_export_text_raw(struct metadata_area *mda,
 				 NULL) ? 1 : 0;
 }

+/* used only for receiving info from lvmetad */
+
 static int _mda_import_text_raw(struct lvmcache_info *info, const struct dm_config_node *cn)
 {
 	struct device *device;
@@ -1995,22 +2216,6 @@ static int _create_vg_text_instance(struct format_instance *fid,
 		}

 		if (type & FMT_INSTANCE_MDAS) {
-			/*
-			 * TODO in theory, this function should be never reached
-			 * while in critical_section(), because lvmcache's
-			 * cached_vg should be valid. However, this assumption
-			 * sometimes fails (possibly due to inconsistent
-			 * (precommit) metadata and/or missing devices), and
-			 * calling lvmcache_label_scan inside the critical
-			 * section may be fatal (i.e. deadlock).
-			 */
-			if (!critical_section())
-				/* Scan PVs in VG for any further MDAs */
-				/*
-				 * FIXME Only scan PVs believed to be in the VG.
- 				 */
-				lvmcache_label_scan(fid->fmt->cmd);
-
 			if (!(vginfo = lvmcache_vginfo_from_vgname(vg_name, vg_id)))
 				goto_out;
 			if (!lvmcache_fid_add_mdas_vg(vginfo, fid))
@@ -2480,7 +2685,7 @@ static int _get_config_disk_area(struct cmd_context *cmd,
 		return 0;
 	}

-	if (!(dev_area.dev = lvmcache_device_from_pvid(cmd, &id, NULL, NULL))) {
+	if (!(dev_area.dev = lvmcache_device_from_pvid(cmd, &id, NULL))) {
 		char buffer[64] __attribute__((aligned(8)));

 		if (!id_write_format(&id, buffer, sizeof(buffer)))
--- a/lib/format_text/import-export.h
+++ b/lib/format_text/import-export.h
@@ -49,13 +49,14 @@ struct text_vg_version_ops {
 	int (*check_version) (const struct dm_config_tree * cf);
 	struct volume_group *(*read_vg) (struct format_instance * fid,
 					 const struct dm_config_tree *cf,
-					 unsigned use_cached_pvs,
-					 unsigned allow_lvmetad_extensions);
+					 unsigned allow_lvmetad_extensions,
+					 uint64_t *failed_flags);
 	void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf,
 			   time_t *when, char **desc);
 	int (*read_vgsummary) (const struct format_type *fmt,
 			       const struct dm_config_tree *cft,
-			       struct lvmcache_vgsummary *vgsummary);
+			       struct lvmcache_vgsummary *vgsummary,
+			       uint64_t *failed_flags);
 };

 struct text_vg_version_ops *text_vg_vsn1_init(void);
@@ -68,27 +69,36 @@ int read_segtype_lvflags(uint64_t *status, char *segtype_str);

 int text_vg_export_file(struct volume_group *vg, const char *desc, FILE *fp);
 size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf);
-struct volume_group *text_vg_import_file(struct format_instance *fid,
+struct volume_group *text_read_metadata_file(struct format_instance *fid,
 					 const char *file,
 					 time_t *when, char **desc);
-struct volume_group *text_vg_import_fd(struct format_instance *fid,
-				       const char *file,
-				       struct cached_vg_fmtdata **vg_fmtdata,
-				       unsigned *use_previous_vg,
-				       int single_device,
-				       struct device *dev,
-				       off_t offset, uint32_t size,
-				       off_t offset2, uint32_t size2,
-				       checksum_fn_t checksum_fn,
-				       uint32_t checksum,
-				       time_t *when, char **desc);

-int text_vgsummary_import(const struct format_type *fmt,
+/* Called in the vg_read path to return the full VG. */
+struct volume_group *text_read_metadata_vg(struct format_instance *fid,
+                                       struct device *dev,
+                                       const char *file,
+                                       struct label_read_data *ld,
+                                       off_t offset, uint32_t size,
+                                       off_t offset2, uint32_t size2,
+                                       uint32_t last_meta_checksum,
+                                       size_t last_meta_size,
+                                       unsigned *last_meta_matches,
+                                       checksum_fn_t checksum_fn,
+                                       uint32_t checksum,
+                                       time_t *when, char **desc,
+                                       uint64_t *failed_flags);
+
+/* Called in the label_scan path to return a partial VG summary. */
+int text_read_metadata_summary(const struct format_type *fmt,
 		       struct device *dev,
+		       struct label_read_data *ld,
 		       off_t offset, uint32_t size,
 		       off_t offset2, uint32_t size2,
 		       checksum_fn_t checksum_fn,
 		       int checksum_only,
-		       struct lvmcache_vgsummary *vgsummary);
+		       struct lvmcache_vgsummary *vgsummary,
+		       uint64_t *failed_flags);
+
+void set_pv_devices(struct format_instance *fid, struct volume_group *vg);

 #endif
--- a/lib/format_text/import.c
+++ b/lib/format_text/import.c
@@ -32,37 +32,70 @@ static void _init_text_import(void)
 	_text_import_initialised = 1;
 }

-/*
- * Find out vgname on a given device.
- */
-int text_vgsummary_import(const struct format_type *fmt,
+int text_read_metadata_summary(const struct format_type *fmt,
 		       struct device *dev,
+		       struct label_read_data *ld,
 		       off_t offset, uint32_t size,
 		       off_t offset2, uint32_t size2,
 		       checksum_fn_t checksum_fn,
 		       int checksum_only,
-		       struct lvmcache_vgsummary *vgsummary)
+		       struct lvmcache_vgsummary *vgsummary,
+		       uint64_t *failed_flags)
 {
 	struct dm_config_tree *cft;
 	struct text_vg_version_ops **vsn;
+	char *buf = NULL;
 	int r = 0;

+	if (ld) {
+		if (ld->buf_len >= (offset + size))
+			buf = ld->buf;
+		else {
+			/*
+			 * Needs data beyond the end of the ld buffer.
+			 * Will do a new synchronous read to get the data.
+			 * (scan_size could also be made larger.)
+			 */
+			log_debug_metadata("label scan buffer for %s too small %u for metadata offset %llu size %u",
+					   dev_name(dev), ld->buf_len, (unsigned long long)offset, size);
+			buf = NULL;
+		}
+	}
+
 	_init_text_import();

-	if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0)))
+	if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0))) {
+		*failed_flags |= FAILED_INTERNAL;
 		return_0;
+	}

-	if ((!dev && !config_file_read(cft)) ||
-	    (dev && !config_file_read_fd(cft, dev, offset, size,
+	if (dev) {
+		if (buf)
+			log_debug_metadata("Copying metadata summary for %s at %llu size %d (+%d)",
+					   dev_name(dev), (unsigned long long)offset,
+					   size, size2);
+		else
+			log_debug_metadata("Reading metadata summary from %s at %llu size %d (+%d)",
+					    dev_name(dev), (unsigned long long)offset,
+					    size, size2);
+
+		if (!config_file_read_fd(cft, dev, buf, offset, size,
 					 offset2, size2, checksum_fn,
 					 vgsummary->mda_checksum,
-					 checksum_only, 1))) {
-		log_error("Couldn't read volume group metadata.");
-		goto out;
+					 checksum_only, 1, failed_flags)) {
+			log_error("Couldn't read volume group metadata from %s at %llu.", dev_name(dev), (unsigned long long)offset);
+			goto out;
+		}
+	} else {
+		if (!config_file_read(cft)) {
+			log_error("Couldn't read volume group metadata from file.");
+			goto out;
+		}
 	}

 	if (checksum_only) {
 		/* Checksum matches already-cached content - no need to reparse. */
+		log_debug_metadata("Metadata summary checksum matches previous for %s.", dev ? dev_name(dev) : "file");
 		r = 1;
 		goto out;
 	}
@@ -74,8 +107,10 @@ int text_vgsummary_import(const struct format_type *fmt,
 		if (!(*vsn)->check_version(cft))
 			continue;

-		if (!(*vsn)->read_vgsummary(fmt, cft, vgsummary))
+		if (!(*vsn)->read_vgsummary(fmt, cft, vgsummary, failed_flags)) {
+			log_debug_metadata("Metadata summary is invalid for %s.", dev ? dev_name(dev) : "file");
 			goto_out;
+		}

 		r = 1;
 		break;
@@ -86,56 +121,94 @@ int text_vgsummary_import(const struct format_type *fmt,
 	return r;
 }

-struct cached_vg_fmtdata {
-        uint32_t cached_mda_checksum;
-        size_t cached_mda_size;
-};
-
-struct volume_group *text_vg_import_fd(struct format_instance *fid,
-				       const char *file,
-				       struct cached_vg_fmtdata **vg_fmtdata,
-				       unsigned *use_previous_vg,
-				       int single_device,
+struct volume_group *text_read_metadata_vg(struct format_instance *fid,
 				       struct device *dev,
+				       const char *file,
+				       struct label_read_data *ld,
 				       off_t offset, uint32_t size,
 				       off_t offset2, uint32_t size2,
+                                       uint32_t last_meta_checksum,
+                                       size_t last_meta_size,
+                                       unsigned *last_meta_matches,
 				       checksum_fn_t checksum_fn,
 				       uint32_t checksum,
-				       time_t *when, char **desc)
+				       time_t *when, char **desc,
+				       uint64_t *failed_flags)
 {
 	struct volume_group *vg = NULL;
 	struct dm_config_tree *cft;
 	struct text_vg_version_ops **vsn;
-	int skip_parse;
-
-	if (vg_fmtdata && !*vg_fmtdata &&
-	    !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) {
-		log_error("Failed to allocate VG fmtdata for text format.");
-		return NULL;
-	}
+	char *buf = NULL;
+	unsigned last_matches;

 	_init_text_import();

 	*desc = NULL;
 	*when = 0;

-	if (!(cft = config_open(CONFIG_FILE_SPECIAL, file, 0)))
+	if (!(cft = config_open(CONFIG_FILE_SPECIAL, file, 0))) {
+		*failed_flags |= FAILED_INTERNAL;
 		return_NULL;
+	}

-	/* Does the metadata match the already-cached VG? */
-	skip_parse = vg_fmtdata && 
-		     ((*vg_fmtdata)->cached_mda_checksum == checksum) &&
-		     ((*vg_fmtdata)->cached_mda_size == (size + size2));
+	if (last_meta_checksum && last_meta_size &&
+	    (checksum == last_meta_checksum) && ((size + size2) == last_meta_size))
+		last_matches = 1;
+	else
+		last_matches = 0;

-	if ((!dev && !config_file_read(cft)) ||
-	    (dev && !config_file_read_fd(cft, dev, offset, size,
+	if (last_meta_matches)
+		*last_meta_matches = last_matches;
+
+	if (ld) {
+		if (ld->buf_len >= (offset + size))
+			buf = ld->buf;
+		else {
+			/*
+			 * Needs data beyond the end of the ld buffer.
+			 * Will do a new synchronous read to get the data.
+			 * (scan_size could also be made larger.)
+			 */
+			log_debug_metadata("scan buffer for %s too small %u for metadata offset %llu size %u",
+					   dev_name(dev), ld->buf_len, (unsigned long long)offset, size);
+			buf = NULL;
+		}
+	}
+
+	if (dev) {
+		if (buf)
+			log_debug_metadata("Copying metadata for %s at %llu size %d (+%d)",
+					   dev_name(dev), (unsigned long long)offset,
+					   size, size2);
+		else
+			log_debug_metadata("Reading metadata from %s at %llu size %d (+%d)",
+				   	   dev_name(dev), (unsigned long long)offset,
+				           size, size2);
+
+		if (!config_file_read_fd(cft, dev, buf, offset, size,
 					 offset2, size2, checksum_fn, checksum,
-					 skip_parse, 1)))
-		goto_out;
+					 last_matches, 1, failed_flags)) {
+			log_error("Couldn't read volume group metadata from %s.", dev_name(dev));

-	if (skip_parse) {
-		if (use_previous_vg)
-			*use_previous_vg = 1;
+			/* We have to be certain this has been set since it's the
+			 * only way the caller knows if the function failed or not. */
+			if (!*failed_flags)
+				*failed_flags |= FAILED_VG_METADATA;
+			goto out;
+		}
+	} else {
+		if (!config_file_read(cft)) {
+			log_error("Couldn't read volume group metadata from file.");
+
+			if (!*failed_flags)
+				*failed_flags |= FAILED_VG_METADATA;
+			goto out;
+		}
+	}
+
+	if (last_matches) {
+		log_debug_metadata("Skipped parsing metadata on %s with matching checksum 0x%x size %zu.",
+				   dev_name(dev), last_meta_checksum, last_meta_size);
 		goto out;
 	}

@@ -146,40 +219,40 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
 		if (!(*vsn)->check_version(cft))
 			continue;

-		if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0)))
+		if (!(vg = (*vsn)->read_vg(fid, cft, 0, failed_flags))) {
+			if (!*failed_flags)
+				*failed_flags |= FAILED_VG_METADATA;
 			goto_out;
+		}

 		(*vsn)->read_desc(vg->vgmem, cft, when, desc);
 		break;
 	}

-	if (vg && vg_fmtdata && *vg_fmtdata) {
-		(*vg_fmtdata)->cached_mda_size = (size + size2);
-		(*vg_fmtdata)->cached_mda_checksum = checksum;
-	}
-
-	if (use_previous_vg)
-		*use_previous_vg = 0;
-
      out:
 	config_destroy(cft);
 	return vg;
 }

-struct volume_group *text_vg_import_file(struct format_instance *fid,
+struct volume_group *text_read_metadata_file(struct format_instance *fid,
 					 const char *file,
 					 time_t *when, char **desc)
 {
-	return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, (off_t)0, 0, (off_t)0, 0, NULL, 0,
-				 when, desc);
+	uint64_t failed_flags = 0;
+
+	return text_read_metadata_vg(fid, NULL, file, NULL,
+				     (off_t)0, 0, (off_t)0, 0,
+				     0, 0, NULL, NULL, 0,
+				     when, desc, &failed_flags);
 }

 static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft,
 							struct format_instance *fid,
-							unsigned allow_lvmetad_extensions)
+							unsigned for_lvmetad)
 {
 	struct volume_group *vg = NULL;
 	struct text_vg_version_ops **vsn;
+	uint64_t failed_flags = 0;
 	int vg_missing;

 	_init_text_import();
@@ -191,13 +264,16 @@ static struct volume_group *_import_vg_from_config_tree(const struct dm_config_t
 		 * The only path to this point uses cached vgmetadata,
 		 * so it can use cached PV state too.
 		 */
-		if (!(vg = (*vsn)->read_vg(fid, cft, 1, allow_lvmetad_extensions)))
+		if (!(vg = (*vsn)->read_vg(fid, cft, for_lvmetad, &failed_flags)))
 			stack;
-		else if ((vg_missing = vg_missing_pv_count(vg))) {
-			log_verbose("There are %d physical volumes missing.",
-				    vg_missing);
+		else {
+			/* FIXME: move this into vg_read() */
+			set_pv_devices(fid, vg);
+			
+			if ((vg_missing = vg_missing_pv_count(vg)))
+				log_verbose("There are %d physical volumes missing.", vg_missing);
+
 			vg_mark_partial_lvs(vg, 1);
-			/* FIXME: move this code inside read_vg() */
 		}
 		break;
 	}
--- a/lib/format_text/import_vsn1.c
+++ b/lib/format_text/import_vsn1.c
@@ -32,9 +32,7 @@ typedef int (*section_fn) (struct format_instance * fid,
 			   struct volume_group * vg, const struct dm_config_node * pvn,
 			   const struct dm_config_node * vgn,
 			   struct dm_hash_table * pv_hash,
-			   struct dm_hash_table * lv_hash,
-			   unsigned *scan_done_once,
-			   unsigned report_missing_devices);
+			   struct dm_hash_table * lv_hash);

 #define _read_int32(root, path, result) \
 	dm_config_get_uint32(root, path, (uint32_t *) (result))
@@ -176,13 +174,17 @@ static int _read_str_list(struct dm_pool *mem, struct dm_list *list, const struc
 	return 1;
 }

+/*
+ * FIXME: move the special lvmetad bits out of here and let the
+ * high level lvmetad-specific code deal with it.  This function
+ * is the wrong place for it.
+ */
+
 static int _read_pv(struct format_instance *fid,
 		    struct volume_group *vg, const struct dm_config_node *pvn,
 		    const struct dm_config_node *vgn __attribute__((unused)),
 		    struct dm_hash_table *pv_hash,
-		    struct dm_hash_table *lv_hash __attribute__((unused)),
-		    unsigned *scan_done_once,
-		    unsigned report_missing_devices)
+		    struct dm_hash_table *lv_hash __attribute__((unused)))
 {
 	struct dm_pool *mem = vg->vgmem;
 	struct physical_volume *pv;
@@ -217,21 +219,6 @@ static int _read_pv(struct format_instance *fid,

        pv->is_labelled = 1; /* All format_text PVs are labelled. */

-	/*
-	 * Convert the uuid into a device.
-	 */
-	if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, scan_done_once,
-                                         &pv->label_sector))) {
-		char buffer[64] __attribute__((aligned(8)));
-
-		if (!id_write_format(&pv->id, buffer, sizeof(buffer)))
-			buffer[0] = '\0';
-		if (report_missing_devices)
-			log_error_once("Couldn't find device with uuid %s.", buffer);
-		else
-			log_very_verbose("Couldn't find device with uuid %s.", buffer);
-	}
-
 	if (!(pv->vg_name = dm_pool_strdup(mem, vg->name)))
 		return_0;

@@ -242,16 +229,6 @@ static int _read_pv(struct format_instance *fid,
 		return 0;
 	}

-	/* TODO is the !lvmetad_used() too coarse here? */
-	if (!pv->dev && !lvmetad_used())
-		pv->status |= MISSING_PV;
-
-	if ((pv->status & MISSING_PV) && pv->dev && pv_mda_used_count(pv) == 0) {
-		pv->status &= ~MISSING_PV;
-		log_info("Recovering a previously MISSING PV %s with no MDAs.",
-			 pv_dev_name(pv));
-	}
-
 	/* Late addition */
 	if (dm_config_has_node(pvn, "dev_size") &&
 	    !_read_uint64(pvn, "dev_size", &pv->size)) {
@@ -304,21 +281,6 @@ static int _read_pv(struct format_instance *fid,
 	pv->pe_align = 0;
 	pv->fmt = fid->fmt;

-	/* Fix up pv size if missing or impossibly large */
-	if ((!pv->size || pv->size > (1ULL << 62)) && pv->dev) {
-		if (!dev_get_size(pv->dev, &pv->size)) {
-			log_error("%s: Couldn't get size.", pv_dev_name(pv));
-			return 0;
-		}
-		log_verbose("Fixing up missing size (%s) "
-			    "for PV %s", display_size(fid->fmt->cmd, pv->size),
-			    pv_dev_name(pv));
-		size = pv->pe_count * (uint64_t) vg->extent_size + pv->pe_start;
-		if (size > pv->size)
-			log_warn("WARNING: Physical Volume %s is too large "
-				 "for underlying device", pv_dev_name(pv));
-	}
-
 	if (!alloc_pv_segment_whole_pv(mem, pv))
 		return_0;

@@ -574,9 +536,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
 			 struct volume_group *vg, const struct dm_config_node *lvn,
 			 const struct dm_config_node *vgn __attribute__((unused)),
 			 struct dm_hash_table *pv_hash __attribute__((unused)),
-			 struct dm_hash_table *lv_hash,
-			 unsigned *scan_done_once __attribute__((unused)),
-			 unsigned report_missing_devices __attribute__((unused)))
+			 struct dm_hash_table *lv_hash)
 {
 	struct dm_pool *mem = vg->vgmem;
 	struct logical_volume *lv;
@@ -731,9 +691,7 @@ static int _read_historical_lvnames(struct format_instance *fid __attribute__((u
 				     struct volume_group *vg, const struct dm_config_node *hlvn,
 				     const struct dm_config_node *vgn __attribute__((unused)),
 				     struct dm_hash_table *pv_hash __attribute__((unused)),
-				     struct dm_hash_table *lv_hash __attribute__((unused)),
-				     unsigned *scan_done_once __attribute__((unused)),
-				     unsigned report_missing_devices __attribute__((unused)))
+				     struct dm_hash_table *lv_hash __attribute__((unused)))
 {
 	struct dm_pool *mem = vg->vgmem;
 	struct generic_logical_volume *glv;
@@ -802,9 +760,7 @@ static int _read_historical_lvnames_interconnections(struct format_instance *fid
 						 struct volume_group *vg, const struct dm_config_node *hlvn,
 						 const struct dm_config_node *vgn __attribute__((unused)),
 						 struct dm_hash_table *pv_hash __attribute__((unused)),
-						 struct dm_hash_table *lv_hash __attribute__((unused)),
-						 unsigned *scan_done_once __attribute__((unused)),
-						 unsigned report_missing_devices __attribute__((unused)))
+						 struct dm_hash_table *lv_hash __attribute__((unused)))
 {
 	struct dm_pool *mem = vg->vgmem;
 	const char *historical_lv_name, *origin_name = NULL;
@@ -914,9 +870,7 @@ static int _read_lvsegs(struct format_instance *fid,
 			struct volume_group *vg, const struct dm_config_node *lvn,
 			const struct dm_config_node *vgn __attribute__((unused)),
 			struct dm_hash_table *pv_hash,
-			struct dm_hash_table *lv_hash,
-			unsigned *scan_done_once __attribute__((unused)),
-			unsigned report_missing_devices __attribute__((unused)))
+			struct dm_hash_table *lv_hash)
 {
 	struct logical_volume *lv;

@@ -972,17 +926,16 @@ static int _read_lvsegs(struct format_instance *fid,
 	return 1;
 }

+/* FIXME: add failed_flags so we can return specific errors. */
+
 static int _read_sections(struct format_instance *fid,
 			  const char *section, section_fn fn,
 			  struct volume_group *vg, const struct dm_config_node *vgn,
 			  struct dm_hash_table *pv_hash,
 			  struct dm_hash_table *lv_hash,
-			  int optional,
-			  unsigned *scan_done_once)
+			  int optional)
 {
 	const struct dm_config_node *n;
-	/* Only report missing devices when doing a scan */
-	unsigned report_missing_devices = scan_done_once ? !*scan_done_once : 1;

 	if (!dm_config_get_section(vgn, section, &n)) {
 		if (!optional) {
@@ -994,25 +947,42 @@ static int _read_sections(struct format_instance *fid,
 	}

 	for (n = n->child; n; n = n->sib) {
-		if (!fn(fid, vg, n, vgn, pv_hash, lv_hash,
-			scan_done_once, report_missing_devices))
+		if (!fn(fid, vg, n, vgn, pv_hash, lv_hash))
 			return_0;
 	}

 	return 1;
 }

+/*
+ * When for_lvmetad is set, this function will look for
+ * special sections that lvmetad adds to the metadata,
+ * which don't actually exist in the real metadata on disk.
+ */
+
+/*
+ * FIXME: this function and everything it calls should be split
+ * up into two phases.  The first should find all the necessary
+ * fields from the ondisk metadata (in cft), validate them,
+ * and save them in corresponding vg fields.  The second should
+ * use that data to set up other vg fields that is derived from
+ * the first.  When it's all mixed together, it's hard to pick
+ * out when there's a problem with the actual text metadata
+ * vs a problem with lvm setting up the vg struct.  Error handling
+ * for the first would involve fixing the metadata on disk,
+ * but error handling for the second wouldn't.
+ */
+
 static struct volume_group *_read_vg(struct format_instance *fid,
 				     const struct dm_config_tree *cft,
-				     unsigned use_cached_pvs,
-				     unsigned allow_lvmetad_extensions)
+				     unsigned for_lvmetad,
+				     uint64_t *failed_flags)
 {
 	const struct dm_config_node *vgn;
 	const struct dm_config_value *cv;
 	const char *str, *format_str, *system_id;
 	struct volume_group *vg;
 	struct dm_hash_table *pv_hash = NULL, *lv_hash = NULL;
-	unsigned scan_done_once = use_cached_pvs;
 	uint64_t vgstatus;

 	/* skip any top-level values */
@@ -1025,7 +995,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	}

 	if (!(vg = alloc_vg("read_vg", fid->fmt->cmd, vgn->key)))
-		return_NULL;
+		goto fail_internal;

 	/*
 	 * The pv hash memorises the pv section names -> pv
@@ -1033,7 +1003,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	 */
 	if (!(pv_hash = dm_hash_create(64))) {
 		log_error("Couldn't create pv hash table.");
-		goto bad;
+		goto fail_internal;
 	}

 	/*
@@ -1042,7 +1012,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	 */
 	if (!(lv_hash = dm_hash_create(1024))) {
 		log_error("Couldn't create lv hash table.");
-		goto bad;
+		goto fail_internal;
 	}

 	vgn = vgn->child;
@@ -1056,7 +1026,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,

 	if (dm_config_get_str(vgn, "lock_type", &str)) {
 		if (!(vg->lock_type = dm_pool_strdup(vg->vgmem, str)))
-			goto bad;
+			goto fail_internal;
 	}

 	/*
@@ -1082,7 +1052,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	 */
 	if (dm_config_get_str(vgn, "lock_args", &str)) {
 		if (!(vg->lock_args = dm_pool_strdup(vg->vgmem, str)))
-			goto bad;
+			goto fail_internal;
 	}

 	if (!_read_id(&vg->id, vgn, "id")) {
@@ -1108,11 +1078,11 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	if (dm_config_get_str(vgn, "system_id", &system_id)) {
 		if (!(vgstatus & LVM_WRITE_LOCKED)) {
 			if (!(vg->lvm1_system_id = dm_pool_zalloc(vg->vgmem, NAME_LEN + 1)))
-				goto_bad;
+				goto fail_internal;
 			strncpy(vg->lvm1_system_id, system_id, NAME_LEN);
 		} else if (!(vg->system_id = dm_pool_strdup(vg->vgmem, system_id))) {
 			log_error("Failed to allocate memory for system_id in _read_vg.");
-			goto bad;
+			goto fail_internal;
 		}
 	}

@@ -1167,17 +1137,19 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	}

 	if (!_read_sections(fid, "physical_volumes", _read_pv, vg,
-			    vgn, pv_hash, lv_hash, 0, &scan_done_once)) {
+			    vgn, pv_hash, lv_hash, 0)) {
 		log_error("Couldn't find all physical volumes for volume "
 			  "group %s.", vg->name);
 		goto bad;
 	}

-	if (allow_lvmetad_extensions)
+	if (for_lvmetad)
 		_read_sections(fid, "outdated_pvs", _read_pv, vg,
-			       vgn, pv_hash, lv_hash, 1, &scan_done_once);
-	else if (dm_config_has_node(vgn, "outdated_pvs"))
+			       vgn, pv_hash, lv_hash, 1);
+	else if (dm_config_has_node(vgn, "outdated_pvs")) {
 		log_error(INTERNAL_ERROR "Unexpected outdated_pvs section in metadata of VG %s.", vg->name);
+		goto fail_internal;
+	}

 	/* Optional tags */
 	if (dm_config_get_list(vgn, "tags", &cv) &&
@@ -1187,28 +1159,28 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	}

 	if (!_read_sections(fid, "logical_volumes", _read_lvnames, vg,
-			    vgn, pv_hash, lv_hash, 1, NULL)) {
+			    vgn, pv_hash, lv_hash, 1)) {
 		log_error("Couldn't read all logical volume names for volume "
 			  "group %s.", vg->name);
 		goto bad;
 	}

 	if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames, vg,
-			    vgn, pv_hash, lv_hash, 1, NULL)) {
+			    vgn, pv_hash, lv_hash, 1)) {
 		log_error("Couldn't read all historical logical volumes for volume "
 			  "group %s.", vg->name);
 		goto bad;
 	}

 	if (!_read_sections(fid, "logical_volumes", _read_lvsegs, vg,
-			    vgn, pv_hash, lv_hash, 1, NULL)) {
+			    vgn, pv_hash, lv_hash, 1)) {
 		log_error("Couldn't read all logical volumes for "
 			  "volume group %s.", vg->name);
 		goto bad;
 	}

 	if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames_interconnections,
-			    vg, vgn, pv_hash, lv_hash, 1, NULL)) {
+			    vg, vgn, pv_hash, lv_hash, 1)) {
 		log_error("Couldn't read all removed logical volume interconnections "
 			  "for volume group %s.", vg->name);
 		goto bad;
@@ -1217,7 +1189,7 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 	if (!fixup_imported_mirrors(vg)) {
 		log_error("Failed to fixup mirror pointers after import for "
 			  "volume group %s.", vg->name);
-		goto bad;
+		goto fail_internal;
 	}

 	dm_hash_destroy(pv_hash);
@@ -1225,12 +1197,17 @@ static struct volume_group *_read_vg(struct format_instance *fid,

 	vg_set_fid(vg, fid);

-	/*
-	 * Finished.
-	 */
 	return vg;

-      bad:
+ bad:
+	/*
+	 * FIXME: there are some internal errors that
+	 * still go through this exit path because functions
+	 * called by this function are not using
+	 * failed_flags yet.  For now we are assuming
+	 * every exit through this path is because there
+	 * is bad content in the metadata.
+	 */
 	if (pv_hash)
 		dm_hash_destroy(pv_hash);

@@ -1238,6 +1215,19 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 		dm_hash_destroy(lv_hash);

 	release_vg(vg);
+	*failed_flags |= FAILED_VG_METADATA_FIELD;
+	return NULL;
+
+ fail_internal:
+	if (pv_hash)
+		dm_hash_destroy(pv_hash);
+
+	if (lv_hash)
+		dm_hash_destroy(lv_hash);
+
+	if (vg)
+		release_vg(vg);
+	*failed_flags |= FAILED_INTERNAL;
 	return NULL;
 }

@@ -1266,51 +1256,70 @@ static void _read_desc(struct dm_pool *mem,
 * FIXME: why are these separate?
 */
 static int _read_vgsummary(const struct format_type *fmt, const struct dm_config_tree *cft, 
-			   struct lvmcache_vgsummary *vgsummary)
+			   struct lvmcache_vgsummary *vgsummary, uint64_t *failed_flags)
 {
 	const struct dm_config_node *vgn;
 	struct dm_pool *mem = fmt->cmd->mem;
 	const char *str;
+	uint64_t vgstatus;

 	if (!dm_config_get_str(cft->root, "creation_host", &str))
 		str = "";

 	if (!(vgsummary->creation_host = dm_pool_strdup(mem, str)))
-		return_0;
+		goto fail_internal;

 	/* skip any top-level values */
 	for (vgn = cft->root; (vgn && vgn->v); vgn = vgn->sib) ;

 	if (!vgn) {
+		*failed_flags |= FAILED_VG_METADATA_FIELD;
 		log_error("Couldn't find volume group in file.");
 		return 0;
 	}

 	if (!(vgsummary->vgname = dm_pool_strdup(mem, vgn->key)))
-		return_0;
+		goto fail_internal;

 	vgn = vgn->child;

 	if (!_read_id(&vgsummary->vgid, vgn, "id")) {
+		*failed_flags |= FAILED_VG_METADATA_FIELD;
 		log_error("Couldn't read uuid for volume group %s.", vgsummary->vgname);
 		return 0;
 	}

-	if (!_read_flag_config(vgn, &vgsummary->vgstatus, VG_FLAGS)) {
+	if (!_read_flag_config(vgn, &vgstatus, VG_FLAGS)) {
+		*failed_flags |= FAILED_VG_METADATA_FIELD;
 		log_error("Couldn't find status flags for volume group %s.",
 			  vgsummary->vgname);
 		return 0;
 	}

+	if (vgstatus & LVM_WRITE_LOCKED) {
+		vgstatus |= LVM_WRITE;
+		vgstatus &= ~LVM_WRITE_LOCKED;
+	}
+	vgsummary->vgstatus = vgstatus;
+
 	if (dm_config_get_str(vgn, "system_id", &str) &&
 	    (!(vgsummary->system_id = dm_pool_strdup(mem, str))))
-		return_0;
+		goto fail_internal;

 	if (dm_config_get_str(vgn, "lock_type", &str) &&
 	    (!(vgsummary->lock_type = dm_pool_strdup(mem, str))))
-		return_0;
+		goto fail_internal;

+	if (!_read_int32(vgn, "seqno", &vgsummary->seqno)) {
+		log_error("Couldn't read 'seqno' for volume group %s.",
+			  vgsummary->vgname);
+		return 0;
+	}
 	return 1;
+
+ fail_internal:
+	*failed_flags |= FAILED_INTERNAL;
+	return 0;
 }

 static struct text_vg_version_ops _vsn1_ops = {
--- a/lib/format_text/layout.h
+++ b/lib/format_text/layout.h
@@ -81,7 +81,9 @@ struct mda_header {
 } __attribute__ ((packed));

 struct mda_header *raw_read_mda_header(const struct format_type *fmt,
-				       struct device_area *dev_area);
+				       struct device_area *dev_area,
+				       struct label_read_data *ld,
+				       uint64_t *failed_flags);

 struct mda_lists {
 	struct dm_list dirs;
@@ -103,8 +105,12 @@ struct mda_context {
 #define LVM2_LABEL "LVM2 001"
 #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize())

-int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah,
-		    struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary,
-		    uint64_t *mda_free_sectors);
+int read_metadata_location_summary(const struct format_type *fmt,
+			struct mda_header *mdah,
+			struct label_read_data *ld,
+			struct device_area *dev_area,
+			struct lvmcache_vgsummary *vgsummary,
+			uint64_t *mda_free_sectors,
+			uint64_t *failed_flags);

 #endif
--- a/lib/format_text/text_label.c
+++ b/lib/format_text/text_label.c
@@ -308,141 +308,371 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)),
 	return 1;
 }

-struct _update_mda_baton {
+struct _mda_baton {
 	struct lvmcache_info *info;
 	struct label *label;
+	struct label_read_data *ld;
+	unsigned int fail_count;
+	unsigned int ignore_count;
+	unsigned int success_count;
 };

-static int _update_mda(struct metadata_area *mda, void *baton)
+/*
+ * The return value from this function is not the result;
+ * the success/failure of reading metadata is tracked by
+ * baton fields.
+ *
+ * If this function returns 0, no further mdas are read.
+ * If this function returns 1, other mdas are read.
+ */
+
+static int _read_mda_header_and_metadata_summary(struct metadata_area *mda, void *baton)
 {
-	struct _update_mda_baton *p = baton;
+	struct _mda_baton *p = baton;
 	const struct format_type *fmt = p->label->labeller->fmt;
 	struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
 	struct mda_header *mdah;
+	struct raw_locn *rl;
+	int rl_count = 0;
+	uint64_t failed_flags = 0;
 	struct lvmcache_vgsummary vgsummary = { 0 };

-	/*
-	 * Using the labeller struct to preserve info about
-	 * the last parsed vgname, vgid, creation host
-	 *
-	 * TODO: make lvmcache smarter and move this cache logic there
-	 */
-
 	if (!dev_open_readonly(mdac->area.dev)) {
-		mda_set_ignored(mda, 1);
-		stack;
+		log_error("Can't open device %s to read metadata from mda.", dev_name(mdac->area.dev));
+		mda->read_failed_flags |= FAILED_INTERNAL;
+		p->fail_count++;
 		return 1;
 	}

-	if (!(mdah = raw_read_mda_header(fmt, &mdac->area))) {
-		stack;
-		goto close_dev;
+	/*
+	 * read mda_header
+	 *
+	 * metadata_area/mda_context/device_area (mda/mdac/area)
+	 * is the incore method of getting to the struct disk_locn
+	 * that followed the struct pv_header and points to the
+	 * struct mda_header.
+	 */
+	if (!(mdah = raw_read_mda_header(fmt, &mdac->area, p->ld, &failed_flags))) {
+		log_debug_metadata("MDA header on %s at %"PRIu64" is not valid.",
+				   dev_name(mdac->area.dev), mdac->area.start);
+		mda->read_failed_flags |= failed_flags;
+		p->fail_count++;
+		goto out;
 	}

 	mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns));

 	if (mda_is_ignored(mda)) {
-		log_debug_metadata("Ignoring mda on device %s at offset %"PRIu64,
-				   dev_name(mdac->area.dev),
-				   mdac->area.start);
-		if (!dev_close(mdac->area.dev))
-			stack;
-		return 1;
+		log_debug_metadata("MDA header on %s at %"PRIu64" has ignored metdata.",
+				   dev_name(mdac->area.dev), mdac->area.start);
+		p->ignore_count++;
+		goto out;
+	} else {
+		rl = &mdah->raw_locns[0];
+		while (rl->offset) {
+			rl_count++;
+			rl++;
+		}
+		log_debug_metadata("MDA header on %s at %"PRIu64" has %d metadata locations.",
+				   dev_name(mdac->area.dev), mdac->area.start, rl_count);
 	}

-	if (vgname_from_mda(fmt, mdah, &mdac->area, &vgsummary,
-			     &mdac->free_sectors) &&
-	    !lvmcache_update_vgname_and_id(p->info, &vgsummary)) {
-		if (!dev_close(mdac->area.dev))
-			stack;
-		return_0;
+	if (!rl_count)
+		goto out;
+
+	/*
+	 * read vg metadata, but saves only a few fields from it in vgsummary
+	 * (enough to create vginfo/info connections in lvmcache)
+	 *
+	 * FIXME: this should be unified with the very similar function
+	 * that reads metadata for vg_read(): read_metadata_location_vg().
+	 *
+	 * (This is not using the checksum optimization because a new/zeroed
+	 * vgsummary struct is passed for each area, and read_metadata_location
+	 * decides to use the checksum optimization based on whether or not
+	 * the vgsummary.vgname is already set.)
+	 */
+	if (!read_metadata_location_summary(fmt, mdah, p->ld, &mdac->area, &vgsummary, &mdac->free_sectors, &failed_flags)) {
+		/* A more specific error has been logged prior to returning. */
+		log_debug_metadata("Metadata location on %s returned no VG summary.", dev_name(mdac->area.dev));
+		mda->read_failed_flags |= failed_flags;
+		p->fail_count++;
+		goto out;
 	}

-close_dev:
+	/*
+	 * Each copy of the metadata for a VG will call this to save the
+	 * VG summary fields in lvmcache.  When the checksum optimization
+	 * is used, this is a waste of time and could be skipped, because
+	 * we'd just be passing the same data again.  When the metadata
+	 * checksum doesn't match between copies, we should set a flag
+	 * in the vginfo.  Or, if we are not using the checksum optimization,
+	 * then we might be passing a metadata summary that doesn't match
+	 * the previous metadata summary (we should include the seqno in
+	 * the summary to make that clearer.)  If the summaries from
+	 * devices doesn't match, we should also set a flag in the vginfo,
+	 * as suggested above for mismatching checksums.  Later, the vg_read
+	 * code could look for the vginfo flag indicating mismatching
+	 * checksums or summary fields (including seqno), and if all the
+	 * copies matched, it could simply reuse the label_read_data
+	 * from the original scan, and avoid label_scan_devs to rescan.
+	 * This optimization would only apply to reporting commands,
+	 * but could reduce scanning to one read per device in the
+	 * common case.
+	 */
+	if (!lvmcache_update_vgname_and_id(p->info, &vgsummary)) {
+		log_debug_metadata("Metadata summary on %s cannot be saved in lvmcache.", dev_name(mdac->area.dev));
+		mda->read_failed_flags |= FAILED_INTERNAL;
+		p->fail_count++;
+		goto out;
+	}
+
+	log_debug_metadata("Metadata summary on %s found for VG %s.", dev_name(mdac->area.dev), vgsummary.vgname);
+	p->success_count++;
+out:
 	if (!dev_close(mdac->area.dev))
 		stack;

 	return 1;
 }

-static int _text_read(struct labeller *l, struct device *dev, void *buf,
-		 struct label **label)
+/*
+ * When label_read_data *ld is set, it means that we have read the first
+ * ld->buf_len bytes of the device and already have that data, so we don't need
+ * to do any dev_read's (as long as the desired dev_read offset+size is less
+ * then ld->buf_len).
+ */
+
+static int _text_read(struct labeller *l, struct device *dev, void *label_buf,
+		      struct label_read_data *ld, struct label **label,
+		      uint64_t *failed_flags)
 {
-	struct label_header *lh = (struct label_header *) buf;
+	char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+	char uuid[64] __attribute__((aligned(8)));
+	struct id pv_id_check;
+	struct label_header *lh = (struct label_header *) label_buf;
 	struct pv_header *pvhdr;
 	struct pv_header_extension *pvhdr_ext;
 	struct lvmcache_info *info;
 	struct disk_locn *dlocn_xl;
 	uint64_t offset;
+	uint64_t device_size;
 	uint32_t ext_version;
-	struct _update_mda_baton baton;
+	uint32_t ext_flags;
+	unsigned int data_area_count = 0;
+	unsigned int meta_area_count = 0;
+	int add_errors = 0;
+	struct _mda_baton baton = { 0 };

 	/*
-	 * PV header base
+	 * pv_header has uuid and device_size
+	 *
+	 * pv_header.disk_areas are two variable sequences of disk_locn's:
+	 * . first null terminated sequence of disk_locn's are data areas
+	 * . second null terminated sequence of disk_locn's are meta areas
+	 *
+	 * pv_header_extension has version and flags
+	 *
+	 * pv_header_extension.bootloader_areas is one set of disk_locn's:
+	 * . null terminated sequence of disk_locn's are bootloader areas
+	 *
+	 * Step 1: look through structs to summarize for log message.
 	 */
-	pvhdr = (struct pv_header *) ((char *) buf + xlate32(lh->offset_xl));
+	pvhdr = (struct pv_header *) ((char *) label_buf + xlate32(lh->offset_xl));

-	if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev,
-				  FMT_TEXT_ORPHAN_VG_NAME,
-				  FMT_TEXT_ORPHAN_VG_NAME, 0)))
+	strncpy(pvid_s, (char *)pvhdr->pv_uuid, sizeof(pvid_s) - 1);
+	pvid_s[sizeof(pvid_s) - 1] = '\0';
+
+	if (!id_read_format_try(&pv_id_check, pvid_s)) {
+		log_debug_metadata("PV header on %s uuid cannot be read.", dev_name(dev));
+		*failed_flags |= FAILED_PV_HEADER;
 		return_0;
+	}

-	*label = lvmcache_get_label(info);
+	if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid))) {
+		log_debug_metadata("PV header on %s uuid cannot be written.", dev_name(dev));
+		*failed_flags |= FAILED_INTERNAL;
+		return_0;
+	}

-	lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl));
+	/*
+	 * FIXME: check for invalid values of other pv_header fields.
+	 */

-	lvmcache_del_das(info);
-	lvmcache_del_mdas(info);
-	lvmcache_del_bas(info);
+	device_size = xlate64(pvhdr->device_size_xl);

 	/* Data areas holding the PEs */
 	dlocn_xl = pvhdr->disk_areas_xl;
 	while ((offset = xlate64(dlocn_xl->offset))) {
-		lvmcache_add_da(info, offset, xlate64(dlocn_xl->size));
 		dlocn_xl++;
+		data_area_count++;
 	}

 	/* Metadata area headers */
 	dlocn_xl++;
 	while ((offset = xlate64(dlocn_xl->offset))) {
-		lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0);
+		dlocn_xl++;
+		meta_area_count++;
+	}
+
+	/* PV header extension */
+	dlocn_xl++;
+	pvhdr_ext = (struct pv_header_extension *) ((char *) dlocn_xl);
+	ext_version = xlate32(pvhdr_ext->version);
+	ext_flags = xlate32(pvhdr_ext->flags);
+
+	log_debug_metadata("PV header on %s has device_size %llu uuid %s",
+			   dev_name(dev), (unsigned long long)device_size, uuid);
+
+	log_debug_metadata("PV header on %s has data areas %d metadata areas %d",
+			   dev_name(dev), data_area_count, meta_area_count);
+
+	log_debug_metadata("PV header on %s has extension version %u flags %x",
+			   dev_name(dev), ext_version, ext_flags);
+
+	/*
+	 * Step 2: look through structs to populate lvmcache
+	 * with pv_header/extension info for this device.
+	 *
+	 * An "info" struct represents a device in lvmcache
+	 * and is created by lvmcache_add().  The info struct
+	 * in lvmcache is not associated with any vginfo
+	 * struct until the VG name is known from the summary.
+	 *
+	 * lvmcache_add() calls _create_info() which creates
+	 * the label struct, saved at info->label.
+	 * lvmcache_get_label(info) then returns info->label.
+	 */
+	if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, FMT_TEXT_ORPHAN_VG_NAME, FMT_TEXT_ORPHAN_VG_NAME, 0))) {
+		log_error("PV %s info cannot be saved in cache.", dev_name(dev));
+		*failed_flags |= FAILED_INTERNAL;
+		return 0;
+	}
+
+	/* get the label that lvmcache_add() created */
+	if (!(*label = lvmcache_get_label(info))) {
+		*failed_flags |= FAILED_INTERNAL;
+		lvmcache_del(info);
+		return_0;
+	}
+
+	lvmcache_set_device_size(info, device_size);
+
+	lvmcache_del_das(info);
+	lvmcache_del_mdas(info);
+	lvmcache_del_bas(info);
+
+	/*
+	 * Following the struct pv_header on disk are two lists of
+	 * struct disk_locn (each disk_locn is an offset+size pair).
+	 * The first list points to areas of data blocks holding PEs.
+	 * The list of disk_locn's is terminated by a disk_locn
+	 * holding zeros.
+	 */
+	dlocn_xl = pvhdr->disk_areas_xl;
+	while ((offset = xlate64(dlocn_xl->offset))) {
+		if (!lvmcache_add_da(info, offset, xlate64(dlocn_xl->size)))
+			add_errors++;
 		dlocn_xl++;
 	}

-	dlocn_xl++;
-
 	/*
-	 * PV header extension
+	 * Following the disk_locn structs for data areas is a
+	 * series of disk_locn structs each pointing to a
+	 * struct mda_header.  dlocn_xl->offset now points
+	 * to the first mda_header on disk.
 	 */
-	pvhdr_ext = (struct pv_header_extension *) ((char *) dlocn_xl);
-	if (!(ext_version = xlate32(pvhdr_ext->version)))
-		goto out;
+	dlocn_xl++;
+	while ((offset = xlate64(dlocn_xl->offset))) {
+		/* this is just a roundabout call to add_mda() */
+		if (!lvmcache_add_mda(info, dev, offset, xlate64(dlocn_xl->size), 0))
+			add_errors++;
+		dlocn_xl++;
+	}

-	log_debug_metadata("%s: PV header extension version %" PRIu32 " found",
-			   dev_name(dev), ext_version);
+	/* PV header extension */
+	dlocn_xl++;
+	pvhdr_ext = (struct pv_header_extension *) ((char *) dlocn_xl);
+
+	/* version 0 doesn't support extension */
+	if (!ext_version)
+		goto mda_read;

 	/* Extension version */
-	lvmcache_set_ext_version(info, xlate32(pvhdr_ext->version));
+	lvmcache_set_ext_version(info, ext_version);

 	/* Extension flags */
-	lvmcache_set_ext_flags(info, xlate32(pvhdr_ext->flags));
+	lvmcache_set_ext_flags(info, ext_flags);

 	/* Bootloader areas */
 	dlocn_xl = pvhdr_ext->bootloader_areas_xl;
 	while ((offset = xlate64(dlocn_xl->offset))) {
-		lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size));
+		if (!lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size)))
+			add_errors++;
 		dlocn_xl++;
 	}
-out:
+
+	if (add_errors) {
+		log_error("PV %s disk area info cannot be saved in cache.", dev_name(dev));
+		*failed_flags |= FAILED_INTERNAL;
+		lvmcache_del(info);
+		return 0;
+	}
+
+mda_read:
+	/*
+	 * Step 3: read struct mda_header and vg metadata which is
+	 * saved into lvmcache and used to create a vginfo struct
+	 * and associate the vginfo with the info structs created
+	 * above.  The locations on disk of the mda_header structs
+	 * comes from the disk_locn structs above.  Those disk_locn
+	 * structs are confusingly not used directly, but were saved
+	 * into lvmcache as metadata_area+mda_context.  This means
+	 * that instead of looking at disk_locn/offset+size, we look
+	 * through a series of incore structs to get offset+size:
+	 * metadata_area/mda_context/device_area/start+size.
+	 * FIXME: get rid of these excessive abstractions.
+	 */
 	baton.info = info;
 	baton.label = *label;
+	baton.ld = ld;

-	if (!lvmcache_foreach_mda(info, _update_mda, &baton))
-		return_0;
+	/*
+	 * for each mda on info->mdas.  These metadata_area structs were
+	 * created and added to info->mdas above by lvmcache_add_mda().
+	 */
+	lvmcache_foreach_mda(info, _read_mda_header_and_metadata_summary, &baton);

-	lvmcache_make_valid(info);
+	/*
+	 * Presumably the purpose of having multiple mdas on a device is
+	 * to have a backup that can be used in case one is bad.
+	 */
+	if (baton.fail_count && baton.success_count) {
+		log_warn("WARNING: Using device %s with mix of %d good and %d bad mdas.",
+			 dev_name(dev), baton.success_count, baton.fail_count);
+	}

-	return 1;
+	if (baton.success_count) {
+		/* if any mda was successful, we ignore other failed mdas */
+		log_debug_metadata("PV on %s has valid mda header and vg metadata.", dev_name(dev));
+		lvmcache_make_valid(info);
+		return 1;
+	} else if (baton.fail_count) {
+		/* FIXME: get failed_flags from mda->read_failed_flags */
+		log_debug_metadata("PV on %s has valid mda header and invalid vg metadata.", dev_name(dev));
+		*failed_flags |= FAILED_VG_METADATA;
+		lvmcache_del(info);
+		return 0;
+	} else if (baton.ignore_count) {
+		log_debug_metadata("PV on %s has valid mda header and ignored vg metadata.", dev_name(dev));
+		lvmcache_make_valid(info);
+		return 1;
+	} else {
+		/* no VG metadata */
+		log_debug_metadata("PV on %s has valid mda header and unused vg metadata.", dev_name(dev));
+		lvmcache_make_valid(info);
+		return 1;
+	}
 }

 static void _text_destroy_label(struct labeller *l __attribute__((unused)),
--- a/lib/label/label.c
+++ b/lib/label/label.c
--- a/lib/label/label.h
+++ b/lib/label/label.h
@@ -18,6 +18,7 @@

 #include "uuid.h"
 #include "device.h"
+#include "toolcontext.h"

 #define LABEL_ID "LABELONE"
 #define LABEL_SIZE SECTOR_SIZE	/* Think very carefully before changing this */
@@ -28,6 +29,17 @@ struct labeller;

 void allow_reads_with_lvmetad(void);

+struct label_read_data {
+	struct dev_async_io *aio;
+	char *buf; /* points to aio->buf */
+	struct device *dev;
+	struct dm_list list;
+	int buf_len; /* same as aio->buf_len */
+	int result; /* same as aio->result */
+	int try_sync;
+	int process_done;
+};
+
 /* On disk - 32 bytes */
 struct label_header {
 	int8_t id[8];		/* LABELONE */
@@ -63,7 +75,9 @@ struct label_ops {
 	 * Read a label from a volume.
 	 */
 	int (*read) (struct labeller * l, struct device * dev,
-		     void *buf, struct label ** label);
+		     void *label_buf,
+		     struct label_read_data *ld, struct label ** label,
+		     uint64_t *failed_flags);

 	/*
 	 * Additional consistency checks for the paranoid.
@@ -99,11 +113,15 @@ int label_register_handler(struct labeller *handler);
 struct labeller *label_get_handler(const char *name);

 int label_remove(struct device *dev);
-int label_read(struct device *dev, struct label **result,
-		uint64_t scan_sector);
+int label_read(struct device *dev, struct label **label, uint64_t scan_sector);
 int label_write(struct device *dev, struct label *label);
 int label_verify(struct device *dev);
 struct label *label_create(struct labeller *labeller);
 void label_destroy(struct label *label);

+int label_scan_force(struct cmd_context *cmd);
+int label_scan(struct cmd_context *cmd);
+int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs);
+struct label_read_data *get_label_read_data(struct cmd_context *cmd, struct device *dev);
+
 #endif
--- a/lib/metadata/metadata-exported.h
+++ b/lib/metadata/metadata-exported.h
@@ -27,6 +27,48 @@
 #include "lv.h"
 #include "lvm-percent.h"

+#define FAILED_INTERNAL                 0x0000000000000001
+#define FAILED_LABEL_CHECKSUM           0x0000000000000002
+#define FAILED_LABEL_SECTOR_NUMBER      0x0000000000000004
+#define FAILED_PV_HEADER                0x0000000000000008
+#define FAILED_MDA_HEADER               0x0000000000000010
+#define FAILED_MDA_HEADER_IO            0x0000000000000020
+#define FAILED_MDA_HEADER_CHECKSUM      0x0000000000000040
+#define FAILED_MDA_HEADER_FIELD         0x0000000000000080
+#define FAILED_MDA_HEADER_RLOCN         0x0000000000000100
+#define FAILED_VG_METADATA              0x0000000000000200
+#define FAILED_VG_METADATA_IO           0x0000000000000400
+#define FAILED_VG_METADATA_CHECKSUM     0x0000000000000800
+#define FAILED_VG_METADATA_PARSE        0x0000000000001000
+#define FAILED_VG_METADATA_FIELD        0x0000000000002000
+#define FAILED_VG_METADATA_SIZE         0x0000000000004000
+#define FAILED_NOT_FOUND                0x0000000000008000
+#define FAILED_BADNAME                  0x0000000000010000
+#define FAILED_VG_LOCKING               0x0000000000020000
+#define FAILED_ACCESS                   0x0000000000040000
+#define FAILED_MISSING_PVS              0x0000000000080000
+#define FAILED_MISSING_DEVS             0x0000000000100000
+#define FAILED_PV_DEV_SIZES             0x0000000000200000
+#define FAILED_BAD_PV_SEGS              0x0000000000400000
+#define FAILED_UNKNOWN_LV_SEGS          0x0000000000800000
+#define FAILED_BAD_LV_SEGS              0x0000000001000000
+#define FAILED_BAD_MDAS                 0x0000000002000000
+#define FAILED_OLD_PVS                  0x0000000004000000
+#define FAILED_REAPPEARED_PVS           0x0000000008000000
+#define FAILED_PARTIAL_LVS              0x0000000010000000
+#define FAILED_OUTDATED_HISTORICAL_LVS  0x0000000020000000
+#define FAILED_WRONG_PV_EXT             0x0000000040000000
+#define FAILED_CLUSTERED                0x0000000080000000
+#define FAILED_EXPORTED                 0x0000000100000000
+#define FAILED_READ_ONLY                0x0000000200000000
+#define FAILED_RESIZEABLE               0x0000000400000000
+#define FAILED_LOCK_TYPE                0x0000000800000000
+#define FAILED_LOCK_MODE                0x0000001000000000
+#define FAILED_SYSTEMID                 0x0000002000000000
+#define FAILED_REPAIR_UPDATE            0x0000004000000000
+#define FAILED_ERROR                    0x8000000000000000
+
+
 #define MAX_STRIPES 128U
 #define SECTOR_SHIFT 9L
 #define SECTOR_SIZE ( 1L << SECTOR_SHIFT )
@@ -175,27 +217,12 @@
 #define MIRROR_SKIP_INIT_SYNC	0x00000010U	/* skip initial sync */

 /* vg_read and vg_read_for_update flags */
-#define READ_ALLOW_INCONSISTENT	0x00010000U
+#define READ_NO_REPAIR		0x00010000U
 #define READ_ALLOW_EXPORTED	0x00020000U
 #define READ_OK_NOTFOUND	0x00040000U
-#define READ_WARN_INCONSISTENT	0x00080000U
-#define READ_FOR_UPDATE		0x00100000U /* A meta-flag, useful with toollib for_each_* functions. */
-
-/* vg's "read_status" field */
-#define FAILED_INCONSISTENT	0x00000001U
-#define FAILED_LOCKING		0x00000002U
-#define FAILED_NOTFOUND		0x00000004U
-#define FAILED_READ_ONLY	0x00000008U
-#define FAILED_EXPORTED		0x00000010U
-#define FAILED_RESIZEABLE	0x00000020U
-#define FAILED_CLUSTERED	0x00000040U
-#define FAILED_ALLOCATION	0x00000080U
-#define FAILED_EXIST		0x00000100U
-#define FAILED_RECOVERY		0x00000200U
-#define FAILED_SYSTEMID		0x00000400U
-#define FAILED_LOCK_TYPE	0x00000800U
-#define FAILED_LOCK_MODE	0x00001000U
-#define SUCCESS			0x00000000U
+#define READ_FOR_UPDATE		0x00080000U /* A meta-flag, useful with toollib for_each_* functions. */
+#define READ_ALLOW_ERRORS	0x00100000U
+#define READ_NO_LOCK		0x00200000U /* vg_read should not do any vg locking */

 #define VGMETADATACOPIES_ALL UINT32_MAX
 #define VGMETADATACOPIES_UNMANAGED 0
@@ -628,8 +655,11 @@ void pvcreate_params_set_defaults(struct pvcreate_params *pp);
 int vg_write(struct volume_group *vg);
 int vg_commit(struct volume_group *vg);
 void vg_revert(struct volume_group *vg);
-struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vg_name,
-				      const char *vgid, uint32_t warn_flags, int *consistent);
+struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vg_name, const char *vgid,
+				      int read_precommit_mdas,
+                                      uint64_t *failed_flags,
+                                      struct dm_list *bad_mdas,
+                                      struct dm_list *old_pvs);

 #define get_pvs( cmd ) get_pvs_internal((cmd), NULL, NULL)
 #define get_pvs_perserve_vg( cmd, pv_list, vg_list ) get_pvs_internal((cmd), (pv_list), (vg_list))
@@ -664,7 +694,7 @@ int vg_missing_pv_count(const struct volume_group *vg);
 int vgs_are_compatible(struct cmd_context *cmd,
 		       struct volume_group *vg_from,
 		       struct volume_group *vg_to);
-uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname);
+int vg_lock_newname(struct cmd_context *cmd, const char *vgname, int *lock_failed, int *name_exists);

 int lv_resize(struct logical_volume *lv,
 	      struct lvresize_params *lp,
@@ -673,15 +703,8 @@ int lv_resize(struct logical_volume *lv,
 /*
 * Return a handle to VG metadata.
 */
-struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
-			     const char *vgid, uint32_t read_flags, uint32_t lockd_state);
-struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
-			 const char *vgid, uint32_t read_flags, uint32_t lockd_state);
-
-/* 
- * Test validity of a VG handle.
- */
-uint32_t vg_read_error(struct volume_group *vg_handle);
+struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name, const char *vgid,
+			     uint32_t read_flags, uint32_t lockd_state, uint64_t *failed_flags);

 /* pe_start and pe_end relate to any existing data so that new metadata
 * areas can avoid overlap */
@@ -709,7 +732,8 @@ uint32_t pv_list_extents_free(const struct dm_list *pvh);
 int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name);
 int vg_validate(struct volume_group *vg);
 struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name);
-struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name);
+struct volume_group *vg_lock_and_create(struct cmd_context *cmd, const char *vg_name,
+					int *lock_failed, int *name_exists);
 int vg_remove_mdas(struct volume_group *vg);
 int vg_remove_check(struct volume_group *vg);
 void vg_remove_pvs(struct volume_group *vg);
@@ -1289,4 +1313,6 @@ int is_system_id_allowed(struct cmd_context *cmd, const char *system_id);

 int vg_strip_outdated_historical_lvs(struct volume_group *vg);

+const char *failed_flags_str(uint64_t failed_flags);
+
 #endif
--- a/lib/metadata/metadata-liblvm.c
+++ b/lib/metadata/metadata-liblvm.c
@@ -314,7 +314,7 @@ struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_nam
 	}

 	if (pp->pva.idp) {
-		if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL, NULL)) &&
+		if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL)) &&
 		    (dev != dev_cache_get(pv_name, cmd->full_filter))) {
 			if (!id_write_format((const struct id*)&pp->pva.idp->uuid,
 			    buffer, sizeof(buffer)))
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
--- a/lib/metadata/metadata.h
+++ b/lib/metadata/metadata.h
@@ -25,6 +25,8 @@
 #include "dev-cache.h"
 #include "lvm-string.h"
 #include "metadata-exported.h"
+#include "lvm-logging.h"
+#include "label.h"

 //#define MAX_STRIPES 128U
 //#define SECTOR_SHIFT 9L
@@ -79,14 +81,18 @@ struct metadata_area_ops {
 	struct volume_group *(*vg_read) (struct format_instance * fi,
 					 const char *vg_name,
 					 struct metadata_area * mda,
-					 struct cached_vg_fmtdata **vg_fmtdata,
-					 unsigned *use_previous_vg,
-					 int single_device);
+					 struct label_read_data *ld,
+					 uint32_t last_meta_checksum,
+					 size_t last_meta_size,
+					 unsigned *last_meta_matches);
+
 	struct volume_group *(*vg_read_precommit) (struct format_instance * fi,
 					 const char *vg_name,
 					 struct metadata_area * mda,
-					 struct cached_vg_fmtdata **vg_fmtdata,
-					 unsigned *use_previous_vg);
+					 struct label_read_data *ld,
+					 uint32_t last_meta_checksum,
+					 size_t last_meta_size,
+					 unsigned *last_meta_matches);
 	/*
 	 * Write out complete VG metadata.  You must ensure internal
 	 * consistency before calling. eg. PEs can't refer to PVs not
@@ -166,7 +172,15 @@ struct metadata_area {
 	struct dm_list list;
 	struct metadata_area_ops *ops;
 	void *metadata_locn;
+	uint64_t header_start; /* mda_header.start */
+	uint64_t read_failed_flags;
 	uint32_t status;
+	uint32_t vg_read_seqno;
+	uint32_t vg_read_meta_checksum;
+	size_t vg_read_meta_size;
+	unsigned vg_read_skipped:1;
+	unsigned vg_read_success:1;
+	unsigned vg_read_failed:1;
 };
 struct metadata_area *mda_copy(struct dm_pool *mem,
 			       struct metadata_area *mda);
--- a/lib/metadata/vg.c
+++ b/lib/metadata/vg.c
@@ -97,11 +97,6 @@ void release_vg(struct volume_group *vg)
 	if (!vg || (vg->fid && vg == vg->fid->fmt->orphan_vg))
 		return;

-	/* Check if there are any vginfo holders */
-	if (vg->vginfo &&
-	    !lvmcache_vginfo_holders_dec_and_test_for_zero(vg->vginfo))
-		return;
-
 	release_vg(vg->vg_committed);
 	release_vg(vg->vg_precommitted);
 	if (vg->cft_precommitted)
@@ -784,10 +779,9 @@ int vgreduce_single(struct cmd_context *cmd, struct volume_group *vg,
 	vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
 	vg->extent_count -= pv_pe_count(pv);

-	orphan_vg = vg_read_for_update(cmd, vg->fid->fmt->orphan_vg_name,
-				       NULL, 0, 0);
-
-	if (vg_read_error(orphan_vg))
+	orphan_vg = vg_read(cmd, vg->fid->fmt->orphan_vg_name, NULL,
+			    READ_NO_LOCK | READ_FOR_UPDATE, 0, NULL); 
+	if (!orphan_vg)
 		goto bad;

 	if (!vg_split_mdas(cmd, vg, orphan_vg) || !vg->pv_count) {
--- a/lib/metadata/vg.h
+++ b/lib/metadata/vg.h
@@ -152,11 +152,6 @@ struct volume_group {
 	struct dm_list removed_pvs;
 	uint32_t open_mode; /* FIXME: read or write - check lock type? */

-	/*
-	 * Store result of the last vg_read().
-	 * 0 for success else appropriate FAILURE_* bits set.
-	 */
-	uint32_t read_status;
 	uint32_t mda_copies; /* target number of mdas for this VG */

 	struct dm_hash_table *hostnames; /* map of creation hostnames */
--- a/lib/misc/lvm-globals.c
+++ b/lib/misc/lvm-globals.c
@@ -54,8 +54,6 @@ static int _activation_checks = 0;
 static char _sysfs_dir_path[PATH_MAX] = "";
 static int _dev_disable_after_error_count = DEFAULT_DISABLE_AFTER_ERROR_COUNT;
 static uint64_t _pv_min_size = (DEFAULT_PV_MIN_SIZE_KB * 1024L >> SECTOR_SHIFT);
-static int _detect_internal_vg_cache_corruption =
-	DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION;
 static const char *_unknown_device_name = DEFAULT_UNKNOWN_DEVICE_NAME;

 void init_verbose(int level)
@@ -198,11 +196,6 @@ void init_pv_min_size(uint64_t sectors)
 	_pv_min_size = sectors;
 }

-void init_detect_internal_vg_cache_corruption(int detect)
-{
-	_detect_internal_vg_cache_corruption = detect;
-}
-
 void set_cmd_name(const char *cmd)
 {
 	strncpy(_cmd_name, cmd, sizeof(_cmd_name) - 1);
@@ -387,11 +380,6 @@ uint64_t pv_min_size(void)
 	return _pv_min_size;
 }

-int detect_internal_vg_cache_corruption(void)
-{
-	return _detect_internal_vg_cache_corruption;
-}
-
 const char *unknown_device_name(void)
 {
 	return _unknown_device_name;
--- a/lib/misc/lvm-globals.h
+++ b/lib/misc/lvm-globals.h
@@ -51,7 +51,6 @@ void init_udev_checking(int checking);
 void init_dev_disable_after_error_count(int value);
 void init_pv_min_size(uint64_t sectors);
 void init_activation_checks(int checks);
-void init_detect_internal_vg_cache_corruption(int detect);
 void init_retry_deactivation(int retry);
 void init_unknown_device_name(const char *name);

@@ -85,7 +84,6 @@ int udev_checking(void);
 const char *sysfs_dir_path(void);
 uint64_t pv_min_size(void);
 int activation_checks(void);
-int detect_internal_vg_cache_corruption(void);
 int retry_deactivation(void);
 const char *unknown_device_name(void);

--- a/liblvm/Makefile.in
+++ b/liblvm/Makefile.in
@@ -45,6 +45,10 @@ include $(top_builddir)/make.tmpl
 LDFLAGS += -L$(top_builddir)/lib -L$(top_builddir)/daemons/dmeventd
 LIBS += $(LVMINTERNAL_LIBS) -ldevmapper

+ifeq ("@AIO@", "yes")
+	LIBS += $(AIO_LIBS)
+endif
+
 .PHONY: install_dynamic install_static install_include install_pkgconfig

 INSTALL_TYPE = install_dynamic
--- a/make.tmpl.in
+++ b/make.tmpl.in
@@ -64,6 +64,7 @@ LDDEPS += @LDDEPS@
 LIB_SUFFIX = @LIB_SUFFIX@
 LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
 DL_LIBS = @DL_LIBS@
+AIO_LIBS = @AIO_LIBS@
 RT_LIBS = @RT_LIBS@
 M_LIBS = @M_LIBS@
 PTHREAD_LIBS = @PTHREAD_LIBS@
--- a/scripts/Makefile.in
+++ b/scripts/Makefile.in
@@ -31,6 +31,10 @@ endif
 	LVMLIBS = @LVM2APP_LIB@ -ldevmapper
 endif

+ifeq ("@AIO@", "yes")
+	LVMLIBS += $(AIO_LIBS)
+endif
+
 LVM_SCRIPTS = lvmdump.sh lvmconf.sh
 DM_SCRIPTS =

--- a/tools/Makefile.in
+++ b/tools/Makefile.in
@@ -109,6 +109,10 @@ ifeq ("@CMDLIB@", "yes")
 	INSTALL_LVM_TARGETS += $(INSTALL_CMDLIB_TARGETS)
 endif

+ifeq ("@AIO@", "yes")
+	LVMLIBS += $(AIO_LIBS)
+endif
+
 EXPORTED_HEADER = $(srcdir)/lvm2cmd.h
 EXPORTED_FN_PREFIX = lvm2

--- a/tools/command.c
+++ b/tools/command.c
@@ -140,6 +140,7 @@ static inline int configtype_arg(struct cmd_context *cmd __attribute__((unused))
 #define ENABLE_DUPLICATE_DEVS    0x00000400
 #define DISALLOW_TAG_ARGS        0x00000800
 #define GET_VGNAME_FROM_OPTIONS  0x00001000
+#define ENABLE_DEFECTIVE_DEVS    0x00002000

 /* create foo_CMD enums for command def ID's in command-lines.in */

--- a/tools/commands.h
+++ b/tools/commands.h
@@ -43,7 +43,7 @@ xx(lastlog,

 xx(lvchange,
   "Change the attributes of logical volume(s)",
-   CACHE_VGMETADATA | PERMITTED_READ_ONLY)
+   PERMITTED_READ_ONLY)

 xx(lvconvert,
   "Change logical volume layout",
@@ -127,7 +127,11 @@ xx(pvdata,

 xx(pvdisplay,
   "Display various attributes of physical volume(s)",
-   CACHE_VGMETADATA | PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH)
+   PERMITTED_READ_ONLY |
+   ENABLE_ALL_DEVS |
+   ENABLE_DUPLICATE_DEVS |
+   ENABLE_DEFECTIVE_DEVS |
+   LOCKD_VG_SH)

 /* ALL_VGS_IS_DEFAULT is for polldaemon to find pvmoves in-progress using process_each_vg. */

@@ -145,7 +149,12 @@ xx(pvremove,

 xx(pvs,
   "Display information about physical volumes",
-   CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH)
+   PERMITTED_READ_ONLY |
+   ALL_VGS_IS_DEFAULT |
+   ENABLE_ALL_DEVS |
+   ENABLE_DUPLICATE_DEVS |
+   ENABLE_DEFECTIVE_DEVS |
+   LOCKD_VG_SH)

 xx(pvscan,
   "List all physical volumes",
@@ -173,7 +182,7 @@ xx(vgcfgrestore,

 xx(vgchange,
   "Change volume group attributes",
-   CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT)
+   PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT)

 xx(vgck,
   "Check the consistency of volume group(s)",
--- a/tools/lvmcmdline.c
+++ b/tools/lvmcmdline.c
@@ -2280,7 +2280,6 @@ static int _get_current_settings(struct cmd_context *cmd)

 	cmd->current_settings.archive = arg_int_value(cmd, autobackup_ARG, cmd->current_settings.archive);
 	cmd->current_settings.backup = arg_int_value(cmd, autobackup_ARG, cmd->current_settings.backup);
-	cmd->current_settings.cache_vgmetadata = cmd->cname->flags & CACHE_VGMETADATA ? 1 : 0;

 	if (arg_is_set(cmd, readonly_ARG)) {
 		cmd->current_settings.activation = 0;
@@ -2796,7 +2795,7 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv)
 	cmd->position_argv = argv;

 	set_cmd_name(cmd->name);
-
+	
 	if (arg_is_set(cmd, backgroundfork_ARG)) {
 		if (!become_daemon(cmd, 1)) {
 			/* parent - quit immediately */
--- a/tools/polldaemon.c
+++ b/tools/polldaemon.c
@@ -157,12 +157,10 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id,
 		}

 		/* Locks the (possibly renamed) VG again */
-		vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state);
-		if (vg_read_error(vg)) {
+		vg = vg_read(cmd, id->vg_name, NULL, READ_FOR_UPDATE, lockd_state, NULL);
+		if (!vg) {
 			/* What more could we do here? */
 			log_error("ABORTING: Can't reread VG for %s.", id->display_name);
-			release_vg(vg);
-			vg = NULL;
 			ret = 0;
 			goto out;
 		}
@@ -400,9 +398,8 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i
 	 * to the VG we're interested in is the change done locally.
 	 */

-	vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state);
-	if (vg_read_error(vg)) {
-		release_vg(vg);
+	vg = vg_read(cmd, id->vg_name, NULL, 0, lockd_state, NULL);
+	if (!vg) {
 		log_error("Can't reread VG for %s", id->display_name);
 		ret = 0;
 		goto out_ret;
--- a/tools/pvscan.c
+++ b/tools/pvscan.c
@@ -300,8 +300,10 @@ static int _pvscan_autoactivate(struct cmd_context *cmd, struct pvscan_aa_params
 static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 {
 	struct pvscan_aa_params pp = { 0 };
+	struct dm_list single_devs;
 	struct dm_list found_vgnames;
 	struct device *dev;
+	struct device_list *devl;
 	const char *pv_name;
 	const char *reason = NULL;
 	int32_t major = -1;
@@ -315,6 +317,7 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 	int add_errors = 0;
 	int ret = ECMD_PROCESSED;

+	dm_list_init(&single_devs);
 	dm_list_init(&found_vgnames);
 	dm_list_init(&pp.changed_vgnames);

@@ -434,8 +437,10 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 	 *  to drop any devices that have left.)
 	 */

-	if (argc || devno_args)
+	if (argc || devno_args) {
 		log_verbose("Scanning devices on command line.");
+		cmd->pvscan_cache_single = 1;
+	}

 	while (argc--) {
 		pv_name = *argv++;
@@ -453,8 +458,11 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 			} else {
 				/* Add device path to lvmetad. */
 				log_debug("Scanning dev %s for lvmetad cache.", pv_name);
-				if (!lvmetad_pvscan_single(cmd, dev, &found_vgnames, &pp.changed_vgnames))
-					add_errors++;
+
+				if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
+					return_0;
+				devl->dev = dev;
+				dm_list_add(&single_devs, &devl->list);
 			}
 		} else {
 			if (sscanf(pv_name, "%d:%d", &major, &minor) != 2) {
@@ -471,8 +479,11 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 			} else {
 				/* Add major:minor to lvmetad. */
 				log_debug("Scanning dev %d:%d for lvmetad cache.", major, minor);
-				if (!lvmetad_pvscan_single(cmd, dev, &found_vgnames, &pp.changed_vgnames))
-					add_errors++;
+
+				if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl))))
+					return_0;
+				devl->dev = dev;
+				dm_list_add(&single_devs, &devl->list);
 			}
 		}

@@ -482,6 +493,15 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv)
 		}
 	}

+	if (!dm_list_empty(&single_devs)) {
+		label_scan_devs(cmd, &single_devs);
+
+		dm_list_iterate_items(devl, &single_devs) {
+			if (!lvmetad_pvscan_single(cmd, devl->dev, &found_vgnames, &pp.changed_vgnames))
+				add_errors++;
+		}
+	}
+
 	if (!devno_args)
 		goto activate;

--- a/tools/toollib.c
+++ b/tools/toollib.c
@@ -26,12 +26,6 @@
 					((ret_code) == ECMD_PROCESSED) ? REPORT_OBJECT_CMDLOG_SUCCESS \
 								   : REPORT_OBJECT_CMDLOG_FAILURE, (ret_code))

-struct device_id_list {
-	struct dm_list list;
-	struct device *dev;
-	char pvid[ID_LEN + 1];
-};
-
 const char *command_name(struct cmd_context *cmd)
 {
 	return cmd->command->name;
@@ -176,42 +170,67 @@ const char *skip_dev_dir(struct cmd_context *cmd, const char *vg_name,
 }

 /*
- * Three possible results:
- * a) return 0, skip 0: take the VG, and cmd will end in success
- * b) return 0, skip 1: skip the VG, and cmd will end in success
- * c) return 1, skip *: skip the VG, and cmd will end in failure
+ * Checks the FAILED flags returned by vg_read() to decide
+ * if they should produce an error message and command exit error.
 *
- * Case b is the special case, and includes the following:
- * . The VG is inconsistent, and the command allows for inconsistent VGs.
- * . The VG is clustered, the host cannot access clustered VG's,
- *   and the command option has been used to ignore clustered vgs.
+ * (This does not have any role in deciding if the command
+ * can use the VG.  Those override conditions allowing a command
+ * to use a VG in spite of failed flags are handled in vg_read(),
+ * and if those override conditions are met, vg_read() does not
+ * return FAILED_ERROR, and this function is not used.)
 *
- * Case c covers the other errors returned when reading the VG.
- *   If *skip is 1, it's OK for the caller to read the list of PVs in the VG.
+ * A number of FAILED flags have conditions in which they should
+ * not produce error messages.  For each of those FAILED flags,
+ * check its associated condition, and clear the flag if the
+ * message can be skipped.  At the end, if any failed flags remain
+ * set, then one or more failed flags cannot be suppressed,
+ * and we return 0 indicating that the error cannot be fully
+ * suppressed.  The flags that cannot be suppressed are returned in
+ * failed_flags_result.
+ *
+ * If all the failed flags that are set have conditions that allow
+ * them to be suppressed, then return 1 indicating that the vg can
+ * be silently skipped.  failed_flags_result will be 0.
+ *
+ * Many FAILED flags do not have conditions in which
+ * they can be suppressed.  If any of these are set,
+ * then this function will always return 0.
 */
-static int _ignore_vg(struct volume_group *vg, const char *vg_name,
-		      struct dm_list *arg_vgnames, uint32_t read_flags,
-		      int *skip, int *notfound)
+
+static int _suppress_failed_flags(struct volume_group *vg, const char *vg_name,
+				  struct dm_list *arg_vgnames, uint32_t read_flags,
+				  uint64_t failed_flags, uint64_t *failed_flags_result)
 {
-	uint32_t read_error = vg_read_error(vg);
+	uint64_t failed = failed_flags;

-	*skip = 0;
-	*notfound = 0;
-
-	if ((read_error & FAILED_NOTFOUND) && (read_flags & READ_OK_NOTFOUND)) {
-		*notfound = 1;
+	/*
+	 * This is an odd case that shouldn't generally happen
+	 * if the failed flags are used as intended.
+	 * Without another specific failed flag we don't really
+	 * know what this case is, and it's probably not right
+	 * to silently ignore it.
+	 */
+	failed &= ~FAILED_ERROR;
+	if (!failed) {
+		*failed_flags_result = FAILED_ERROR;
 		return 0;
 	}

-	if ((read_error & FAILED_INCONSISTENT) && (read_flags & READ_ALLOW_INCONSISTENT))
-		read_error &= ~FAILED_INCONSISTENT; /* Check for other errors */
+	/*
+	 * NOT_FOUND will not generally have any other flags set,
+	 * so check if we're done early.
+	 */
+	if ((!vg || (failed & FAILED_NOT_FOUND)) && (read_flags & READ_OK_NOTFOUND))
+		failed &= ~FAILED_NOT_FOUND;

-	if ((read_error & FAILED_CLUSTERED) && vg->cmd->ignore_clustered_vgs) {
-		read_error &= ~FAILED_CLUSTERED; /* Check for other errors */
-		log_verbose("Skipping volume group %s", vg_name);
-		*skip = 1;
+	if (!failed) {
+		*failed_flags_result = 0;
+		return 1;
 	}

+	if ((failed & FAILED_CLUSTERED) && vg->cmd->ignore_clustered_vgs)
+		failed &= ~FAILED_CLUSTERED;
+
 	/*
 	 * Commands that operate on "all vgs" shouldn't be bothered by
 	 * skipping a foreign VG, and the command shouldn't fail when
@@ -219,17 +238,9 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name,
 	 * operate on a foreign VG and it's skipped, then the command
 	 * would expect to fail.
 	 */
-	if (read_error & FAILED_SYSTEMID) {
-		if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) {
-			log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.",
-				  vg->name, vg->system_id, vg->cmd->system_id ? "" : "unknown ",
-				  vg->cmd->system_id ? " " : "", vg->cmd->system_id ? vg->cmd->system_id : "");
-			return 1;
-		} else {
-			read_error &= ~FAILED_SYSTEMID; /* Check for other errors */
-			log_verbose("Skipping foreign volume group %s", vg_name);
-			*skip = 1;
-		}
+	if (failed & FAILED_SYSTEMID) {
+		if (!arg_vgnames || !str_list_match_item(arg_vgnames, vg->name))
+			failed &= ~FAILED_SYSTEMID;
 	}

 	/*
@@ -241,37 +252,151 @@ static int _ignore_vg(struct volume_group *vg, const char *vg_name,
 	 * VG lock_type requires lvmlockd), and FAILED_LOCK_MODE (the
 	 * command failed to acquire the necessary lock.)
 	 */
-	if (read_error & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) {
-		if (arg_vgnames && str_list_match_item(arg_vgnames, vg->name)) {
-			if (read_error & FAILED_LOCK_TYPE)
-				log_error("Cannot access VG %s with lock type %s that requires lvmlockd.",
-					  vg->name, vg->lock_type);
-			/* For FAILED_LOCK_MODE, the error is printed in vg_read. */
-			return 1;
-		} else {
-			read_error &= ~FAILED_LOCK_TYPE; /* Check for other errors */
-			read_error &= ~FAILED_LOCK_MODE;
-			log_verbose("Skipping volume group %s", vg_name);
-			*skip = 1;
+	if (failed & (FAILED_LOCK_TYPE | FAILED_LOCK_MODE)) {
+		if (!arg_vgnames || !str_list_match_item(arg_vgnames, vg->name)) {
+			failed &= ~FAILED_LOCK_TYPE;
+			failed &= ~FAILED_LOCK_MODE;
 		}
 	}

-	if (read_error == FAILED_CLUSTERED) {
-		*skip = 1;
-		stack;	/* Error already logged */
-		return 1;
+	/*
+	 * If failed flags remain set, then all the failures cannot be
+	 * suppressed, and we return 0.  If all failed flags have been
+	 * cleared by their associated conditions, then we can silently
+	 * suppress the error.
+	 */
+	if (failed) {
+		*failed_flags_result = failed;
+		return 0;
 	}

-	if (read_error != SUCCESS) {
-		*skip = 0;
-		if (is_orphan_vg(vg_name))
-			log_error("Cannot process standalone physical volumes");
-		else
-			log_error("Cannot process volume group %s", vg_name);
-		return 1;
+	*failed_flags_result = 0;
+	return 1;
+}
+
+static void _print_failed_flags(struct cmd_context *cmd, struct volume_group *vg,
+				const char *vg_name, uint64_t failed_flags_print)
+{
+	uint64_t failed = failed_flags_print;
+
+	/*
+	 * This shouldn't happen if failed_flags and suppress_failed_flags()
+	 * are used as intended.
+	 */
+	if (!failed) {
+		log_error(INTERNAL_ERROR "Cannot use VG %s (no failed flags).", vg_name);
+		return;
 	}

-	return 0;
+	if (failed & FAILED_ERROR) {
+		failed &= ~FAILED_ERROR;
+		/*
+		 * Usually this general flag is ignored and a more specific
+		 * flag is set, but if this happens to be the only flag set
+		 * for some reason then print a generic error.
+		 */
+		if (!failed)
+			log_error("Cannot read VG %s.", vg_name);
+		return;
+	}
+
+	if (failed & FAILED_INTERNAL) {
+		failed &= ~FAILED_INTERNAL;
+		log_error("Cannot read VG %s (internal error).", vg_name);
+	}
+
+	if (failed & FAILED_NOT_FOUND) {
+		failed &= ~FAILED_NOT_FOUND;
+		log_error("Volume group \"%s\" not found.", vg_name);
+	}
+
+	if (failed & FAILED_BADNAME) {
+		failed &= ~FAILED_BADNAME;
+		log_error("Volume group name \"%s\" has invalid characters.", vg_name);
+	}
+
+	if (failed & FAILED_VG_LOCKING) {
+		failed &= ~FAILED_VG_LOCKING;
+		log_error("Cannot lock VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_READ_ONLY) {
+		failed &= ~FAILED_READ_ONLY;
+		log_error("Cannot access read-only VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_EXPORTED) {
+		failed &= ~FAILED_EXPORTED;
+		log_error("Cannot access exported VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_RESIZEABLE) {
+		failed &= ~FAILED_RESIZEABLE;
+		log_error("Cannot access non-resizeable VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_CLUSTERED) {
+		failed &= ~FAILED_CLUSTERED;
+		log_error("Cannot access clustered VG %s that requires clvmd.", vg_name);
+	}
+
+	if (failed & FAILED_SYSTEMID) {
+		failed &= ~FAILED_SYSTEMID;
+		log_error("Cannot access VG %s with system ID %s with %slocal system ID%s%s.",
+			  vg_name, vg->system_id, vg->cmd->system_id ? "" : "unknown ",
+			  cmd->system_id ? " " : "", cmd->system_id ? cmd->system_id : "");
+	}
+
+	if (failed & FAILED_LOCK_TYPE) {
+		failed &= ~FAILED_LOCK_TYPE;
+		log_error("Cannot access VG %s with lock type %s that requires lvmlockd.",
+			  vg_name, vg->lock_type);
+	}
+
+	if (failed & FAILED_LOCK_MODE) {
+		failed &= ~FAILED_LOCK_MODE;
+		/* FIXME: remove same message in access_vg_lock_type() ? */
+		log_error("Cannot access VG %s due to failed lock in lvmlockd.", vg_name);
+	}
+
+	if (failed & FAILED_MISSING_PVS) {
+		failed &= ~FAILED_MISSING_PVS;
+		log_error("Cannot change VG %s while PVs are missing.", vg_name);
+		log_error("Consider vgreduce --removemissing.");
+	}
+
+	if (failed & FAILED_MISSING_DEVS) {
+		failed &= ~FAILED_MISSING_DEVS;
+		log_error("Cannot change VG %s while PVs have no devices.", vg_name);
+		log_error("Consider vgreduce --removemissing.");
+	}
+
+	if (failed & FAILED_BAD_PV_SEGS) {
+		failed &= ~FAILED_BAD_PV_SEGS;
+		log_error("Bad PV segments in VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_UNKNOWN_LV_SEGS) {
+		failed &= ~FAILED_UNKNOWN_LV_SEGS;
+		log_error("Unknown LV segments in VG %s.", vg_name);
+	}
+
+	if (failed & FAILED_BAD_LV_SEGS) {
+		failed &= ~FAILED_BAD_LV_SEGS;
+		log_error("Bad LV segments in VG %s.", vg_name);
+	}
+
+	/*
+	 * TODO: add a check and message for each failed flag 
+	 */
+
+	/*
+	 * This generic message should never be reached, but keep it
+	 * here in case a new failed flag is added without adding a
+	 * check above.
+	 */
+	if (failed)
+		log_error("Cannot use VG %s due to failed flags 0x%llx.", vg_name, (unsigned long long)failed);
 }

 /*
@@ -1900,13 +2025,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags,
 	const char *vg_name;
 	const char *vg_uuid;
 	uint32_t lockd_state = 0;
+	uint64_t failed_flags;
+	uint64_t failed_flags_print;
 	int whole_selected = 0;
 	int ret_max = ECMD_PROCESSED;
 	int ret;
-	int skip;
-	int notfound;
 	int process_all = 0;
-	int already_locked;
 	int do_report_ret_code = 1;

 	log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG);
@@ -1923,8 +2047,6 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags,
 	dm_list_iterate_items(vgnl, vgnameids_to_process) {
 		vg_name = vgnl->vg_name;
 		vg_uuid = vgnl->vgid;
-		skip = 0;
-		notfound = 0;

 		uuid[0] = '\0';
 		if (is_orphan_vg(vg_name)) {
@@ -1949,17 +2071,31 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags,
 			continue;
 		}

-		already_locked = lvmcache_vgname_is_locked(vg_name);
+		failed_flags = 0;
+		failed_flags_print = 0;

-		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state);
-		if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, &notfound)) {
+		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags);
+		if (!vg || (failed_flags & FAILED_ERROR)) {
+			if (!_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print)) {
+				_print_failed_flags(cmd, vg, vg_name, failed_flags_print);
+				ret_max = ECMD_FAILED;
+				report_log_ret_code(ret_max);
+			}
 			stack;
-			ret_max = ECMD_FAILED;
-			report_log_ret_code(ret_max);
 			goto endvg;
 		}
-		if (skip || notfound)
-			goto endvg;
+
+		/*
+		 * The VG can be used when failed_flags do not include ERROR.
+		 * TODO: in what cases do we want to warn about failed_flags
+		 * that are set?
+		 */
+		if (failed_flags) {
+			_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print);
+			/* _print_warn_flags(cmd, vg, vg_name, failed_flags_print); */
+			/* FAILED_PV_DEV_SIZES WARNING: One or more devices used as PVs in VG  have changed sizes */
+			log_warn("WARNING: Processing VG %s with failed flags 0x%llx.", vg_name, (unsigned long long)failed_flags_print);
+		}

 		/* Process this VG? */
 		if ((process_all ||
@@ -1969,6 +2105,12 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags,

 			log_very_verbose("Process single VG %s", vg_name);

+			/*
+			 * FIXME: pass failed_flags to single function so that
+			 * each command can decide what to do about any non-fatal
+			 * issues that still exist.
+			 */
+
 			ret = process_single_vg(cmd, vg_name, vg, handle);
 			_update_selection_result(handle, &whole_selected);
 			if (ret != ECMD_PROCESSED)
@@ -1977,11 +2119,11 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags,
 			if (ret > ret_max)
 				ret_max = ret;
 		}
-
-		if (!vg_read_error(vg) && !already_locked)
-			unlock_vg(cmd, vg, vg_name);
 endvg:
-		release_vg(vg);
+		if (vg) {
+			unlock_vg(cmd, vg, vg_name);
+			release_vg(vg);
+		}
 		if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
 			stack;

@@ -2216,14 +2358,10 @@ int process_each_vg(struct cmd_context *cmd,
 	}

 	/*
-	 * First rescan for available devices, then force the next
-	 * label scan to be done.  get_vgnameids() will scan labels
-	 * (when not using lvmetad).
+	 * Scan all devices to populate lvmcache with initial
+	 * list of PVs and VGs.
 	 */
-	if (cmd->cname->flags & REQUIRES_FULL_LABEL_SCAN) {
-		dev_cache_full_scan(cmd->full_filter);
-		lvmcache_force_next_label_scan();
-	}
+	lvmcache_label_scan(cmd);

 	/*
 	 * A list of all VGs on the system is needed when:
@@ -3571,15 +3709,14 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag
 	struct dm_list *tags_arg;
 	struct dm_list lvnames;
 	uint32_t lockd_state = 0;
+	uint64_t failed_flags;
+	uint64_t failed_flags_print;
 	const char *vg_name;
 	const char *vg_uuid;
 	const char *vgn;
 	const char *lvn;
 	int ret_max = ECMD_PROCESSED;
 	int ret;
-	int skip;
-	int notfound;
-	int already_locked;
 	int do_report_ret_code = 1;

 	log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG);
@@ -3587,8 +3724,6 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag
 	dm_list_iterate_items(vgnl, vgnameids_to_process) {
 		vg_name = vgnl->vg_name;
 		vg_uuid = vgnl->vgid;
-		skip = 0;
-		notfound = 0;

 		uuid[0] = '\0';
 		if (vg_uuid && !id_write_format((const struct id*)vg_uuid, uuid, sizeof(uuid)))
@@ -3640,17 +3775,29 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag
 			continue;
 		}

-		already_locked = lvmcache_vgname_is_locked(vg_name);
+		failed_flags = 0;
+		failed_flags_print = 0;

-		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state);
-		if (_ignore_vg(vg, vg_name, arg_vgnames, read_flags, &skip, &notfound)) {
+		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags);
+		if (!vg || (failed_flags & FAILED_ERROR)) {
+			if (!_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print)) {
+				_print_failed_flags(cmd, vg, vg_name, failed_flags_print);
+				ret_max = ECMD_FAILED;
+				report_log_ret_code(ret_max);
+			}
 			stack;
-			ret_max = ECMD_FAILED;
-			report_log_ret_code(ret_max);
 			goto endvg;
 		}
-		if (skip || notfound)
-			goto endvg;
+
+		/*
+		 * The VG can be used when failed_flags do not include ERROR.
+		 * TODO: in what cases do we want to warn about failed_flags
+		 * that are set?
+		 */
+		if (failed_flags) {
+			_suppress_failed_flags(vg, vg_name, arg_vgnames, read_flags, failed_flags, &failed_flags_print);
+			log_warn("WARNING: Processing VG %s with failed flags %llx.", vg_name, (unsigned long long)failed_flags_print);
+		}

 		ret = process_each_lv_in_vg(cmd, vg, &lvnames, tags_arg, 0,
 					    handle, check_single_lv, process_single_lv);
@@ -3659,13 +3806,14 @@ static int _process_lv_vgnameid_list(struct cmd_context *cmd, uint32_t read_flag
 		report_log_ret_code(ret);
 		if (ret > ret_max)
 			ret_max = ret;
-
-		if (!already_locked)
-			unlock_vg(cmd, vg, vg_name);
 endvg:
-		release_vg(vg);
+		if (vg) {
+			unlock_vg(cmd, vg, vg_name);
+			release_vg(vg);
+		}
 		if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
 			stack;
+
 		log_set_report_object_name_and_id(NULL, NULL);
 	}
 	do_report_ret_code = 0;
@@ -3758,6 +3906,12 @@ int process_each_lv(struct cmd_context *cmd,
 		goto_out;
 	}

+	/*
+	 * Scan all devices to populate lvmcache with initial
+	 * list of PVs and VGs.
+	 */
+	lvmcache_label_scan(cmd);
+
 	/*
 	 * A list of all VGs on the system is needed when:
 	 * . processing all VGs on the system
@@ -3917,51 +4071,6 @@ out:
 	return r;
 }

-static int _device_list_remove(struct dm_list *devices, struct device *dev)
-{
-	struct device_id_list *dil;
-
-	dm_list_iterate_items(dil, devices) {
-		if (dil->dev == dev) {
-			dm_list_del(&dil->list);
-			return 1;
-		}
-	}
-
-	return 0;
-}
-
-static struct device_id_list *_device_list_find_dev(struct dm_list *devices, struct device *dev)
-{
-	struct device_id_list *dil;
-
-	dm_list_iterate_items(dil, devices) {
-		if (dil->dev == dev)
-			return dil;
-	}
-
-	return NULL;
-}
-
-static int _device_list_copy(struct cmd_context *cmd, struct dm_list *src, struct dm_list *dst)
-{
-	struct device_id_list *dil;
-	struct device_id_list *dil_new;
-
-	dm_list_iterate_items(dil, src) {
-		if (!(dil_new = dm_pool_alloc(cmd->mem, sizeof(*dil_new)))) {
-			log_error("device_id_list alloc failed.");
-			return ECMD_FAILED;
-		}
-
-		dil_new->dev = dil->dev;
-		strncpy(dil_new->pvid, dil->pvid, ID_LEN);
-		dm_list_add(dst, &dil_new->list);
-	}
-
-	return ECMD_PROCESSED;
-}
-
 /*
 * For each device in arg_devices or all_devices that has a pvid, add a copy of
 * that device to arg_missed.  All PVs (devices with a pvid) should have been
@@ -4078,13 +4187,13 @@ static int _process_duplicate_pvs(struct cmd_context *cmd,
 	dm_list_iterate_items(devl, &unused_duplicate_devs) {
 		/* Duplicates are displayed if -a is used or the dev is named as an arg. */

-		_device_list_remove(all_devices, devl->dev);
+		device_list_remove(all_devices, devl->dev);

 		if (!process_all_devices && dm_list_empty(arg_devices))
 			continue;

-		if ((dil = _device_list_find_dev(arg_devices, devl->dev)))
-			_device_list_remove(arg_devices, devl->dev);
+		if ((dil = device_list_find_dev(arg_devices, devl->dev)))
+			device_list_remove(arg_devices, devl->dev);

 		if (!process_all_devices && !dil)
 			continue;
@@ -4142,6 +4251,62 @@ static int _process_duplicate_pvs(struct cmd_context *cmd,
 	return ECMD_PROCESSED;
 }

+/*
+ * FIXME: add a new letter for defective devs to show in pv_attr and
+ * a new reporting field pvs -o defective.
+ */
+
+static int _process_defective_pvs(struct cmd_context *cmd,
+				  struct dm_list *all_devices,
+				  struct dm_list *arg_devices,
+				  int process_all_devices,
+				  struct processing_handle *handle,
+				  process_single_pv_fn_t process_single_pv)
+{
+	struct physical_volume pv_dummy;
+	struct physical_volume *pv;
+	struct device_id_list *dil;
+	struct device_list *devl;
+	struct dm_list defective_devs;
+	int ret_max = ECMD_PROCESSED;
+	int ret = 0;
+
+	dm_list_init(&defective_devs);
+
+	if (!lvmcache_get_defective_devs(cmd, &defective_devs))
+		return_ECMD_FAILED;
+
+	dm_list_iterate_items(devl, &defective_devs) {
+
+		device_list_remove(all_devices, devl->dev);
+
+		if ((dil = device_list_find_dev(arg_devices, devl->dev)))
+			device_list_remove(arg_devices, devl->dev);
+
+		if (!(cmd->cname->flags & ENABLE_DEFECTIVE_DEVS))
+			continue;
+
+		log_very_verbose("Processing defective device %s.", dev_name(devl->dev));
+
+		memset(&pv_dummy, 0, sizeof(pv_dummy));
+		dm_list_init(&pv_dummy.tags);
+		dm_list_init(&pv_dummy.segments);
+		pv_dummy.dev = devl->dev;
+		pv_dummy.fmt = cmd->fmt;
+		pv = &pv_dummy;
+
+		ret = process_single_pv(cmd, NULL, pv, handle);
+
+		if (ret > ret_max)
+			ret_max = ret;
+
+		if (sigint_caught())
+			return_ECMD_FAILED;
+	}
+
+	return ECMD_PROCESSED;
+}
+
 static int _process_pvs_in_vg(struct cmd_context *cmd,
 			      struct volume_group *vg,
 			      struct dm_list *all_devices,
@@ -4205,9 +4370,9 @@ static int _process_pvs_in_vg(struct cmd_context *cmd,
 		/* Remove each arg_devices entry as it is processed. */

 		if (!process_pv && !dm_list_empty(arg_devices) &&
-		    (dil = _device_list_find_dev(arg_devices, pv->dev))) {
+		    (dil = device_list_find_dev(arg_devices, pv->dev))) {
 			process_pv = 1;
-			_device_list_remove(arg_devices, dil->dev);
+			device_list_remove(arg_devices, dil->dev);
 		}

 		if (!process_pv && !dm_list_empty(arg_tags) &&
@@ -4222,7 +4387,7 @@ static int _process_pvs_in_vg(struct cmd_context *cmd,
 			else
 				log_very_verbose("Processing PV %s in VG %s.", pv_name, vg->name);

-			_device_list_remove(all_devices, pv->dev);
+			device_list_remove(all_devices, pv->dev);

 			/*
 			 * pv->dev should be found in all_devices unless it's a
@@ -4292,11 +4457,11 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags,
 	const char *vg_name;
 	const char *vg_uuid;
 	uint32_t lockd_state = 0;
+	uint64_t failed_flags;
+	uint64_t failed_flags_print;
 	int ret_max = ECMD_PROCESSED;
 	int ret;
 	int skip;
-	int notfound;
-	int already_locked;
 	int do_report_ret_code = 1;

 	log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_VG);
@@ -4305,7 +4470,6 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags,
 		vg_name = vgnl->vg_name;
 		vg_uuid = vgnl->vgid;
 		skip = 0;
-		notfound = 0;

 		uuid[0] = '\0';
 		if (is_orphan_vg(vg_name)) {
@@ -4330,25 +4494,42 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags,

 		log_debug("Processing PVs in VG %s", vg_name);

-		already_locked = lvmcache_vgname_is_locked(vg_name);
+		failed_flags = 0;
+		failed_flags_print = 0;

-		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state);
-		if (_ignore_vg(vg, vg_name, NULL, read_flags, &skip, &notfound)) {
+		vg = vg_read(cmd, vg_name, vg_uuid, read_flags, lockd_state, &failed_flags);
+		if (!vg || (failed_flags & FAILED_ERROR)) {
+			if (!_suppress_failed_flags(vg, vg_name, NULL, read_flags, failed_flags, &failed_flags_print)) {
+				_print_failed_flags(cmd, vg, vg_name, failed_flags_print);
+				ret_max = ECMD_FAILED;
+				report_log_ret_code(ret_max);
+			}
 			stack;
-			ret_max = ECMD_FAILED;
-			report_log_ret_code(ret_max);
-			if (!skip)
+
+			/*
+			 * FIXME: can we just do this instead of going
+			 * through the processing with the skip flag?
+			 * remove_pv_list_from_device_list(&vg->pvs, arg_devices);
+			 * remove_pv_list_from_device_list(&vg->pvs, all_devices);
+			 */
+			if (vg) {
+				skip = 1;
+				goto process;
+			} else {
 				goto endvg;
-			/* Drop through to eliminate a clustered VG's PVs from the devices list */
+			}
 		}
-		if (notfound)
-			goto endvg;
-		
+
 		/*
-		 * Don't continue when skip is set, because we need to remove
-		 * vg->pvs entries from devices list.
+		 * The VG can be used when failed_flags do not include ERROR.
+		 * TODO: in what cases do we want to warn about failed_flags
+		 * that are set?
 		 */
-		
+		if (failed_flags) {
+			_suppress_failed_flags(vg, vg_name, NULL, read_flags, failed_flags, &failed_flags_print);
+			log_warn("WARNING: Processing VG %s with failed flags 0x%llx.", vg_name, (unsigned long long)failed_flags_print);
+		}
+process:
 		ret = _process_pvs_in_vg(cmd, vg, all_devices, arg_devices, arg_tags,
 					 process_all_pvs, process_all_devices, skip,
 					 handle, process_single_pv);
@@ -4357,11 +4538,12 @@ static int _process_pvs_in_vgs(struct cmd_context *cmd, uint32_t read_flags,
 		report_log_ret_code(ret);
 		if (ret > ret_max)
 			ret_max = ret;
-
-		if (!skip && !already_locked)
-			unlock_vg(cmd, vg, vg->name);
 endvg:
-		release_vg(vg);
+		if (vg) {
+			unlock_vg(cmd, vg, vg->name);
+			release_vg(vg);
+		}
+
 		if (!lockd_vg(cmd, vg_name, "un", 0, &lockd_state))
 			stack;

@@ -4412,9 +4594,7 @@ int process_each_pv(struct cmd_context *cmd,
 	 * if it was removed between creating the list of all VGs and then
 	 * processing each VG.
 	 */
-	if (only_this_vgname)
-		read_flags |= READ_WARN_INCONSISTENT;
-	else
+	if (!only_this_vgname)
 		read_flags |= READ_OK_NOTFOUND;

 	/* Disable error in vg_read so we can print it from ignore_vg. */
@@ -4467,7 +4647,12 @@ int process_each_pv(struct cmd_context *cmd,
 	if (!trust_cache() && !orphans_locked) {
 		log_debug("Scanning for available devices");
 		lvmcache_destroy(cmd, 1, 0);
-		dev_cache_full_scan(cmd->full_filter);
+
+		/*
+		 * Scan all devices to populate lvmcache with initial
+		 * list of PVs and VGs.
+		 */
+		lvmcache_label_scan(cmd);
 	}

 	if (!get_vgnameids(cmd, &all_vgnameids, only_this_vgname, 1)) {
@@ -4538,6 +4723,13 @@ int process_each_pv(struct cmd_context *cmd,
 	if (ret > ret_max)
 		ret_max = ret;

+	ret = _process_defective_pvs(cmd, &all_devices, &arg_devices, process_all_devices,
+				     handle, process_single_pv);
+	if (ret != ECMD_PROCESSED)
+		stack;
+	if (ret > ret_max)
+		ret_max = ret;
+
 	/*
 	 * If the orphans lock was held, there shouldn't be missed devices.  If
 	 * there were, we cannot clear the cache while holding the orphans lock
@@ -4566,7 +4758,7 @@ int process_each_pv(struct cmd_context *cmd,
 		struct dm_list arg_missed_orig;

 		dm_list_init(&arg_missed_orig);
-		_device_list_copy(cmd, &arg_missed, &arg_missed_orig);
+		device_list_copy(cmd, &arg_missed, &arg_missed_orig);

 		log_verbose("Some PVs were not found in first search, retrying.");

@@ -4588,8 +4780,8 @@ int process_each_pv(struct cmd_context *cmd,

 		/* Devices removed from arg_missed are removed from arg_devices. */
 		dm_list_iterate_items(dil, &arg_missed_orig) {
-			if (!_device_list_find_dev(&arg_missed, dil->dev))
-				_device_list_remove(&arg_devices, dil->dev);
+			if (!device_list_find_dev(&arg_missed, dil->dev))
+				device_list_remove(&arg_devices, dil->dev);
 		}
 	}

@@ -5419,7 +5611,6 @@ int pvcreate_each_device(struct cmd_context *cmd,
 	struct pv_list *pvl;
 	struct pv_list *vgpvl;
 	const char *pv_name;
-	int consistent = 0;
 	int must_use_all = (cmd->cname->flags & MUST_USE_ALL_ARGS);
 	int found;
 	unsigned i;
@@ -5481,6 +5672,8 @@ int pvcreate_each_device(struct cmd_context *cmd,

 	dev_cache_full_scan(cmd->full_filter);

+	lvmcache_label_scan(cmd);
+
 	/*
 	 * Translate arg names into struct device's.
 	 */
@@ -5635,6 +5828,8 @@ int pvcreate_each_device(struct cmd_context *cmd,
 		goto out;
 	}

+	lvmcache_label_scan(cmd);
+
 	/*
 	 * The device args began on the arg_devices list, then the first check
 	 * loop moved those entries to arg_process as they were found.  Devices
@@ -5707,9 +5902,11 @@ do_command:
 	 * and not recreate a new PV on top of an existing PV.
 	 */
 	if (pp->preserve_existing && pp->orphan_vg_name) {
+		uint64_t failed_flags = 0;
+
 		log_debug("Using existing orphan PVs in %s.", pp->orphan_vg_name);

-		if (!(orphan_vg = vg_read_internal(cmd, pp->orphan_vg_name, NULL, 0, &consistent))) {
+		if (!(orphan_vg = vg_read_internal(cmd, pp->orphan_vg_name, NULL, 0, &failed_flags, NULL, NULL))) {
 			log_error("Cannot read orphans VG %s.", pp->orphan_vg_name);
 			goto bad;
 		}
--- a/tools/tools.h
+++ b/tools/tools.h
@@ -113,7 +113,6 @@ struct arg_value_group_list {
 	uint32_t prio;
 };

-#define CACHE_VGMETADATA	0x00000001
 #define PERMITTED_READ_ONLY 	0x00000002
 /* Process all VGs if none specified on the command line. */
 #define ALL_VGS_IS_DEFAULT	0x00000004
@@ -137,6 +136,8 @@ struct arg_value_group_list {
 #define DISALLOW_TAG_ARGS        0x00000800
 /* Command may need to find VG name in an option value. */
 #define GET_VGNAME_FROM_OPTIONS  0x00001000
+/* Command should process defective devices. */
+#define ENABLE_DEFECTIVE_DEVS    0x00002000

 void usage(const char *name);

--- a/tools/vgcfgbackup.c
+++ b/tools/vgcfgbackup.c
@@ -62,12 +62,6 @@ static int _vg_backup_single(struct cmd_context *cmd, const char *vg_name,
 		if (!backup_to_file(filename, vg->cmd->cmd_line, vg))
 			return_ECMD_FAILED;
 	} else {
-		if (vg_read_error(vg) == FAILED_INCONSISTENT) {
-			log_error("No backup taken: specify filename with -f "
-				  "to backup an inconsistent VG");
-			return ECMD_FAILED;
-		}
-
 		/* just use the normal backup code */
 		backup_enable(cmd, 1);	/* force a backup */
 		if (!backup(vg))
@@ -94,7 +88,7 @@ int vgcfgbackup(struct cmd_context *cmd, int argc, char **argv)

 	init_pvmove(1);

-	ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_ALLOW_INCONSISTENT, 0,
+	ret = process_each_vg(cmd, argc, argv, NULL, NULL, READ_NO_REPAIR, 0,
 			      handle, &_vg_backup_single);

 	dm_free(last_filename);
--- a/tools/vgcfgrestore.c
+++ b/tools/vgcfgrestore.c
@@ -74,6 +74,8 @@ int vgcfgrestore(struct cmd_context *cmd, int argc, char **argv)
 		return ECMD_FAILED;
 	}

+	lvmcache_label_scan(cmd);
+
 	cmd->handles_unknown_segments = 1;

 	if (!(arg_is_set(cmd, file_ARG) ?
--- a/tools/vgcreate.c
+++ b/tools/vgcreate.c
@@ -26,7 +26,8 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
 	const char *clustered_message = "";
 	char *vg_name;
 	struct arg_value_group_list *current_group;
-	uint32_t rc;
+	int lock_failed = 0;
+	int name_exists = 0;

 	if (!argc) {
 		log_error("Please provide volume group name and "
@@ -72,8 +73,8 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv)
 	 * Check if the VG name already exists.  This should be done before
 	 * creating PVs on any of the devices.
 	 */
-	if ((rc = vg_lock_newname(cmd, vp_new.vg_name)) != SUCCESS) {
-		if (rc == FAILED_EXIST)
+	if (!vg_lock_newname(cmd, vp_new.vg_name, &lock_failed, &name_exists)) {
+		if (name_exists)
 			log_error("A volume group called %s already exists.", vp_new.vg_name);
 		else
 			log_error("Can't get lock for %s.", vp_new.vg_name);
--- a/tools/vgmerge.c
+++ b/tools/vgmerge.c
@@ -19,12 +19,11 @@ static struct volume_group *_vgmerge_vg_read(struct cmd_context *cmd,
 					     const char *vg_name)
 {
 	struct volume_group *vg;
+
 	log_verbose("Checking for volume group \"%s\"", vg_name);
-	vg = vg_read_for_update(cmd, vg_name, NULL, 0, 0);
-	if (vg_read_error(vg)) {
-		release_vg(vg);
-		return NULL;
-	}
+
+	if (!(vg = vg_read(cmd, vg_name, NULL, READ_FOR_UPDATE, 0, NULL)))
+		return_NULL;

 	if (is_lockd_type(vg->lock_type)) {
 		log_error("vgmerge not allowed for lock_type %s", vg->lock_type);
--- a/tools/vgremove.c
+++ b/tools/vgremove.c
@@ -37,8 +37,7 @@ static int _vgremove_single(struct cmd_context *cmd, const char *vg_name,
 	 * Even multiple --yes are equivalent to single --force
 	 * When we require -ff it cannot be replaces with -f -y
 	 */
-	force_t force = (force_t) arg_count(cmd, force_ARG)
-		? : (arg_is_set(cmd, yes_ARG) ? DONT_PROMPT : PROMPT);
+	force_t force = (force_t) arg_count(cmd, force_ARG) ? : (arg_is_set(cmd, yes_ARG) ? DONT_PROMPT : PROMPT);
 	unsigned lv_count, missing;
 	int ret;

@@ -71,8 +70,28 @@ static int _vgremove_single(struct cmd_context *cmd, const char *vg_name,
 	if (!lockd_free_vg_before(cmd, vg, 0))
 		return_ECMD_FAILED;

-	if (!force && !vg_remove_check(vg))
-		return_ECMD_FAILED;
+	if (!force) {
+		/*
+		 * FIXME: are there other things we need to check which were
+		 * allowed in vg_read because of handles_missing_pvs, but we
+		 * don't want to handle without force?
+		 */
+
+		if (vg_missing_pv_count(vg))
+			return_ECMD_FAILED;
+
+		if (!vg_check_status(vg, EXPORTED_VG))
+			return_ECMD_FAILED;
+
+		if ((lv_count = vg_visible_lvs(vg))) {
+			log_error("Volume group \"%s\" still contains %u logical volume(s)",
+				  vg->name, lv_count);
+			return_ECMD_FAILED;
+		}
+
+        	if (!archive(vg))
+			return_ECMD_FAILED;
+	}

 	vg_remove_pvs(vg);

--- a/tools/vgsplit.c
+++ b/tools/vgsplit.c
@@ -464,11 +464,14 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd,
 					int *existing_vg)
 {
 	struct volume_group *vg_to = NULL;
+	int lock_failed = 0;
+	int name_exists = 0;

 	log_verbose("Checking for new volume group \"%s\"", vg_name_to);
+
 	/*
-	 * First try to create a new VG.  If we cannot create it,
-	 * and we get FAILED_EXIST (we will not be holding a lock),
+	 * First try to create a new VG.  If we cannot create it 
+	 * and it already exists (we will not be holding a lock),
 	 * a VG must already exist with this name.  We then try to
 	 * read the existing VG - the vgsplit will be into an existing VG.
 	 *
@@ -476,26 +479,28 @@ static struct volume_group *_vgsplit_to(struct cmd_context *cmd,
 	 * we obtained a WRITE lock and could not find the vgname in the
 	 * system.  Thus, the split will be into a new VG.
 	 */
-	vg_to = vg_lock_and_create(cmd, vg_name_to);
-	if (vg_read_error(vg_to) == FAILED_LOCKING) {
+	vg_to = vg_lock_and_create(cmd, vg_name_to, &lock_failed, &name_exists);
+
+	if (vg_to)
+		return vg_to;
+
+	if (lock_failed) {
 		log_error("Can't get lock for %s", vg_name_to);
-		release_vg(vg_to);
 		return NULL;
 	}
-	if (vg_read_error(vg_to) == FAILED_EXIST) {
+
+	if (name_exists) {
 		*existing_vg = 1;
-		release_vg(vg_to);
-		vg_to = vg_read_for_update(cmd, vg_name_to, NULL, 0, 0);

-		if (vg_read_error(vg_to)) {
-			release_vg(vg_to);
+		if (!(vg_to = vg_read(cmd, vg_name_to, NULL, READ_NO_LOCK | READ_FOR_UPDATE, 0, NULL)))
 			return_NULL;
-		}

-	} else if (vg_read_error(vg_to) == SUCCESS) {
-		*existing_vg = 0;
+		return vg_to;
 	}
-	return vg_to;
+
+	/* shouldn't happen */
+	log_error("Failed to lock or create VG %s.", vg_name_to);
+	return_NULL;
 }

 /*
@@ -511,11 +516,8 @@ static struct volume_group *_vgsplit_from(struct cmd_context *cmd,

 	log_verbose("Checking for volume group \"%s\"", vg_name_from);

-	vg_from = vg_read_for_update(cmd, vg_name_from, NULL, 0, 0);
-	if (vg_read_error(vg_from)) {
-		release_vg(vg_from);
-		return NULL;
-	}
+	if (!(vg_from = vg_read(cmd, vg_name_from, NULL, READ_FOR_UPDATE, 0, NULL)))
+		return_NULL;

 	if (is_lockd_type(vg_from->lock_type)) {
 		log_error("vgsplit not allowed for lock_type %s", vg_from->lock_type);
@@ -581,6 +583,11 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
 		return ECMD_FAILED;
 	}

+	if (!validate_name(vg_name_to)) {
+		log_error("Invalid vg name %s", vg_name_to);
+		return ECMD_FAILED;
+	}
+
 	if (strcmp(vg_name_to, vg_name_from) < 0)
 		lock_vg_from_first = 0;

@@ -749,9 +756,8 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv)
 	 */
 	if (!test_mode()) {
 		release_vg(vg_to);
-		vg_to = vg_read_for_update(cmd, vg_name_to, NULL,
-					   READ_ALLOW_EXPORTED, 0);
-		if (vg_read_error(vg_to)) {
+		if (!(vg_to = vg_read(cmd, vg_name_to, NULL, READ_FOR_UPDATE | READ_ALLOW_EXPORTED, 0, NULL))) {
+			/* FIXME: this inconsistent message is not necessarily true. */
 			log_error("Volume group \"%s\" became inconsistent: "
 				  "please fix manually", vg_name_to);
 			goto bad;
Author	SHA1	Message	Date
David Teigland	b2ad2272b0	new vg_read in progress	2017-10-18 16:23:22 -05:00
David Teigland	7fe6cb3dab	Move code that sets PV devices Move the code that looks up a struct device for each PV in the metadata, and makes various adjustments to the struct vg accordingly. This code was buried in the metadata parsing code, where it didn't belong. It should happen once on the final struct vg assembled from the multiple metadata copies. It was happening when each copy of the metadata was parsed. When vg_read() has been renovated, it should go there, but for now it is pulled up to one layer below vg_read().	2017-10-18 14:10:53 -05:00
David Teigland	b25b94a8ce	vg_read: don't make device an orphan on error If the metadata starts with an invalid name, the code was explicitly adding the device to the orphan vg. The device is not an orphan, so drop this, and just return an error.	2017-10-18 14:10:45 -05:00
David Teigland	88e0ba2957	Add failed_flags to functions in reading paths Also change some function names to make the parallels more obvious between the label scan read path and vg_read path. This should not change any behavior. The added failed_flags are not yet being used.	2017-10-18 14:10:37 -05:00
David Teigland	7019592396	lvmcache: add defective device for bad mda_header or metadata If label scan finds bad data in the mda_header or reading a summary of the vg metadata, add the device to the defective list.	2017-10-18 14:10:32 -05:00
David Teigland	e45bbde67b	lvmcache: add defective device for bad pv header If label scan finds bad data in the pv header of an LVM device, add the device to the defective list. Previously, lvm ignored the bad data and acted as if the device was unformatted.	2017-10-18 14:10:26 -05:00
David Teigland	868ba093ab	lvmcache: add defective device for bad label header If label scan finds bad data in the label header of an LVM device, add the device to the defective list. Previously, lvm ignored the bad data and acted as if the device was unformatted.	2017-10-18 14:10:21 -05:00
David Teigland	657d6de15f	lvmcache: add list of defective devices Add a new class of PV. A "defective device" belongs to lvm, but has labels/headers/metadata that are corrupt, invalid, unparsable, or not understood by lvm in some way. LVM can't use the device in this condition, so it puts in into the new class of defective devices, which are displayed by 'pvs' but are otherwise not usable.	2017-10-18 14:10:15 -05:00
David Teigland	5b8ba61d8d	label_scan: use a single aio context A new aio context was being created, io_setup(2), in each call to rescan labels. This is an expensive call, so keep the aio context around to reuse in each call.	2017-10-18 14:09:12 -05:00
David Teigland	d00e72e82b	doc: add description of disk reading	2017-10-18 14:09:12 -05:00
David Teigland	577108bd5d	scanning: get async events from config setting	2017-10-18 14:09:12 -05:00
David Teigland	44591ada76	lvmetad_vg_lookup: use fid ref_count to fix unwanted free	2017-10-18 14:09:12 -05:00
David Teigland	41c479b09a	pvscan: use new dev scanning code	2017-10-18 14:09:12 -05:00
David Teigland	15568b4a74	label_scan: use the new scanning for label_scan_invalid	2017-10-18 14:09:12 -05:00
David Teigland	6f9b9e3617	scanning: remove references to async reads Now that label_read_data structs are used for both sync and async reads.	2017-10-18 14:09:12 -05:00
David Teigland	a1c52e3346	config: move init of aio/scan settings	2017-10-18 14:09:12 -05:00
David Teigland	a72e58351c	config: move scan settings to devices section	2017-10-18 14:09:12 -05:00
David Teigland	149b6811ae	label_scan: fix label scan for independent metadata areas When label_scan reads metadata from independent areas, not from the devices, set a flag on the vginfo. Use this flag to avoid trying to rescan those devices in vg_read() since the metadata won't be found on them, and wreck the lvmcache data set up by the label scan.	2017-10-18 14:09:12 -05:00
David Teigland	d2d7ba739d	add comment describing the role of fid/fic	2017-10-18 14:09:12 -05:00
David Teigland	312b514706	pvscan: quit if duplicates are found in label scan duplicate PVs are detected during label scan and lvmetad is disabled when they are detected. pvscan --cache can quit after label scan if duplicates were found rather than going through the remaining metadata reading steps.	2017-10-18 14:09:12 -05:00
David Teigland	f233a6c8ae	label_scan: get scan_size from config setting	2017-10-18 14:09:12 -05:00
David Teigland	afeca4edad	label_scan: use label_read_data for synchronous scans We can read a large amount of data into label_read_data synchronously also, and use this data in the processing path instead of reading each bit of data from disk separately.	2017-10-18 14:09:12 -05:00
David Teigland	61ef4f225f	update configure for aio	2017-10-18 14:09:12 -05:00
David Teigland	2bf5762301	label_scan: add to vgcfgrestore This command doesn't use process_each so it needs to do a label_scan itself before trying to parse metadata, which wants to know which PVs are on which devices.	2017-10-18 14:09:12 -05:00
David Teigland	2fdedafa31	configure: improve libaio check	2017-10-18 14:09:12 -05:00
David Teigland	086e634923	configure: autoreconf	2017-10-18 14:09:12 -05:00
David Teigland	8003f7d51d	conditional compile with AIO_SUPPORT	2017-10-18 14:09:12 -05:00
David Teigland	2989789433	label_scan: remove async/sync distinction from callers	2017-10-18 14:09:12 -05:00
David Teigland	796e0b1357	scanning: rewrite lvmetad_pvscan_vg to use new label reading This is used to refresh the lvmetad content for a VG after lvmlockd has invalidated the cached copy of the metadata in lvmetad.	2017-10-18 14:09:12 -05:00
David Teigland	72f3d27d10	pvscan: use new label_scan data 'pvscan --cache' for scanning all devices now uses label_scan_async and can reuse data like other commands. 'pvscan --cache dev' can't do a label_scan because it's only allowed to read the single dev. A label_read on that single dev is added prior to reading the VG from it.	2017-10-18 14:09:12 -05:00
David Teigland	99e56c9f77	scanning: allocate label data struct from mem pool Use a separate alloc/free loop for ld structs from mem pool vs the aio-related structs that are not from pool.	2017-10-18 14:09:12 -05:00
David Teigland	37d8fc2836	dev-io: add layer around async io Use the actual aio system calls only in dev-io.c and use an obfuscation layer above that in lvm.	2017-10-18 14:09:12 -05:00
David Teigland	3272708947	label_scan: pull out to top level label_scan is a primary step that a command performs, following the standard pattern for command processing. It was being called as a side effect of a utility/helper function, which made it less obvious and made it easier to be called without realizing it. Pull it up to a prominent top level in the sequence of primary steps. label_scan is not something that should be done in various low level places as needed.	2017-10-18 14:09:12 -05:00
David Teigland	d2c9ab4be6	vg_read: avoid another extraneous device read when using async read data. This is the short disk read that validates VG metadata before reading the full metadata. There's a nearly identical function in the label scan path, as this one in the vg_read path. The other version was already adapted to use the label read data, but this one was missed.	2017-10-18 14:09:12 -05:00
David Teigland	fc0153fdc8	vg_read: use the same async read code as label scan Both for rescanning labels at the start of vg_read, and then passing and using that reread data through the vg_read() path to avoid rereading the same data from disk.	2017-10-18 14:09:12 -05:00
David Teigland	ec4c4d40a4	vg_read: improve messages and add comments comments added where future error path handling should go	2017-10-18 14:09:12 -05:00
David Teigland	f412a846ee	vg_read: new wording for functions and messages Make the function names and messages parallel each other on the two parallel vg reading paths (label reading and vg_read).	2017-10-18 14:09:12 -05:00
David Teigland	977d6d5ce6	labels: move the label scan at the start of each vg_read This moves a low level label scan to the start of vg_read.	2017-10-18 14:09:12 -05:00
David Teigland	ddd5b9a292	labels: avoid label_read when getting fmt in vg_read When vg_read() begins, it looks up the format (fmt) for the VG name in lvmcache, telling lvmcache_fmt_from_vgname() to reread labels on all devices in the VG. Avoid rereading the labels on all the devices, and trust that lvmcache has correct information. If the format of the VG is not available, the calling code already rescans labels and retries.	2017-10-18 14:09:12 -05:00
David Teigland	9466449246	labels: avoid label_read when getting device from pvid When the low levels of vg_read() are parsing VG metadata, they see a PVID, and try to get the device for it, calling lvmcache_device_from_pvid(). When this function found the dev for this PVID in lvmcache, it would issue a full label_read() on that device and verify that the pvid/dev mapping in lvmcache is correct. Remove this label_read() and trust that the pvid to dev mapping in lvmcache is correct. If metadata changed between the initial label scan performed by the command, and the locked vg_read(), then other code exists to rescan labels. (The lvmetad case already trusted the contents of lvmcache.)	2017-10-18 14:09:12 -05:00
David Teigland	6d2ac36ba0	labels: avoid metadata area read using async read data Copy the metadata out of the initial async read buffer instead of performing another two synchronous reads (first to check vgname, second to read all metadata.)	2017-10-18 14:08:35 -05:00
David Teigland	2783c6f43a	labels: avoid mda_header read using async read data Extend the initial async read buffer size to cover all the headers/metadata that need to be read from the device during label scan. Copy the mda_header from this buffer instead of performing another synchronous read for it.	2017-10-18 14:08:35 -05:00
David Teigland	a5b4badf2c	labels: add async label scan	2017-10-18 14:08:35 -05:00
David Teigland	ca7b11ed71	lvmcache: simplify metadata cache The copy of VG metadata stored in lvmcache was not being used in general. It pretended to be a generic VG metadata cache, but was not being used except for clvmd activation. There it was used to avoid reading from disk while devices were suspended, i.e. in resume. This removes the code that attempted to make this look like a generic metadata cache, and replaces with with something narrowly targetted to what it's actually used for. This is a way of passing the VG from suspend to resume in clvmd. Since in the case of clvmd one caller can't simply pass the same VG to both suspend and resume, suspend needs to stash the VG somewhere that resume can grab it from. (resume doesn't want to read it from disk since devices are suspended.) The lvmcache vginfo struct is used as a convenient place to stash the VG to pass it from suspend to resume, even though it isn't related to the lvmcache or vginfo. These suspended_vg* vginfo fields should not be used or touched anywhere else, they are only to be used for passing the VG data from suspend to resume in clvmd. The VG data being passed between suspend and resume is never modified, and will only exist in the brief period between suspend and resume in clvmd. suspend has both old (current) and new (precommitted) copies of the VG metadata. It stashes both of these in the vginfo prior to suspending devices. When vg_commit is successful, it sets a flag in vginfo as before, signaling the transition from old to new metadata. resume grabs the VG stashed by suspend. If the vg_commit happened, it grabs the new VG, and if the vg_commit didn't happen it grabs the old VG. The VG is then used to resume LVs. This isolates clvmd-specific code and usage from the normal lvm vg_read code, making the code simpler and the behavior easier to verify. Sequence of operations: - lv_suspend() has both vg_old and vg_new and stashes a copy of each onto the vginfo: lvmcache_save_suspended_vg(vg_old); lvmcache_save_suspended_vg(vg_new); - vg_commit() happens, which causes all clvmd instances to call lvmcache_commit_metadata(vg). A flag is set in the vginfo indicating the transition from the old to new VG: vginfo->suspended_vg_committed = 1; - lv_resume() needs either vg_old or vg_new to use in resuming LVs. It doesn't want to read the VG from disk since devices are suspended, so it gets the VG stashed by lv_suspend: vg = lvmcache_get_suspended_vg(vgid); If the vg_commit did not happen, suspended_vg_committed will not be set, and in this case, lvmcache_get_suspended_vg() will return the old VG instead of the new VG, and it will resume LVs based on the old metadata.	2017-10-18 14:08:35 -05:00