Merge branch 'master' into 2018-05-11-fork-libdm

2025-03-24 14:50:34 +03:00 · 2018-05-16 13:43:02 +01:00 · 2018-05-16 13:43:02 +01:00 · 89fdc0b588
commit 89fdc0b588
parent ccc35e2647 7c852c75c3
60 changed files with 645 additions and 336 deletions
--- a/Makefile.in
+++ b/Makefile.in
@ -1,6 +1,6 @@
 #
 # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@ -61,6 +61,7 @@ po: tools daemons
 man: tools
 all_man: tools
 scripts: liblvm libdm
+test: tools daemons

 lib.device-mapper: include.device-mapper
 libdm.device-mapper: include.device-mapper
@ -96,7 +97,7 @@ endif
 DISTCLEAN_TARGETS += cscope.out
 CLEAN_DIRS += autom4te.cache

-check check_system check_cluster check_local check_lvmetad check_lvmpolld check_lvmlockd_test check_lvmlockd_dlm check_lvmlockd_sanlock: all
+check check_system check_cluster check_local check_lvmetad check_lvmpolld check_lvmlockd_test check_lvmlockd_dlm check_lvmlockd_sanlock: test
 	$(MAKE) -C test $(@)

 conf.generate man.generate: tools
@ -145,7 +146,7 @@ install_system_dirs:
 	$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_RUN_DIR)
 	$(INSTALL_ROOT_DATA) /dev/null $(DESTDIR)$(DEFAULT_CACHE_DIR)/.cache

-install_initscripts: 
+install_initscripts:
 	$(MAKE) -C scripts install_initscripts

 install_systemd_generators:
@ -168,6 +169,7 @@ install_tmpfiles_configuration:

 LCOV_TRACES = libdm.info lib.info liblvm.info tools.info \
 	libdaemon/client.info libdaemon/server.info \
+	test/unit.info \
 	daemons/clvmd.info \
 	daemons/dmeventd.info \
 	daemons/lvmetad.info \
--- a/4
+++ b/4
@ -1,5 +1,9 @@
 Version 2.02.178 - 
 =====================================
+  Remove lvm1 and pool format handling and add filter to ignore them.
+  Move some filter checks to after disks are read.
+  Rework disk scanning and when it is used.
+  Add new io layer and shift code to using it.
  lvconvert: don't return success on degraded -m raid1 conversion
  --enable-testing switch for ./configure has been removed.
  --with-snapshots switch for ./configure has been removed.
--- a/conf/example.conf.in
+++ b/conf/example.conf.in
@ -702,29 +702,17 @@ global {
 	activation = 1

 	# Configuration option global/fallback_to_lvm1.
-	# Try running LVM1 tools if LVM cannot communicate with DM.
-	# This option only applies to 2.4 kernels and is provided to help
-	# switch between device-mapper kernels and LVM1 kernels. The LVM1
-	# tools need to be installed with .lvm1 suffices, e.g. vgscan.lvm1.
-	# They will stop working once the lvm2 on-disk metadata format is used.
+	# This setting is no longer used.
 	# This configuration option has an automatic default value.
-	# fallback_to_lvm1 = @DEFAULT_FALLBACK_TO_LVM1@
+	# fallback_to_lvm1 = 0

 	# Configuration option global/format.
-	# The default metadata format that commands should use.
-	# The -M 1|2 option overrides this setting.
-	# 
-	# Accepted values:
-	#   lvm1
-	#   lvm2
-	# 
+	# This setting is no longer used.
 	# This configuration option has an automatic default value.
 	# format = "lvm2"

 	# Configuration option global/format_libraries.
-	# Shared libraries that process different metadata formats.
-	# If support for LVM1 metadata was compiled as a shared library use
-	# format_libraries = "liblvm2format1.so"
+	# This setting is no longer used.
 	# This configuration option does not have a default value defined.

 	# Configuration option global/segment_libraries.
@ -821,13 +809,6 @@ global {
 	# encountered the internal error. Please only enable for debugging.
 	abort_on_internal_errors = 0

-	# Configuration option global/detect_internal_vg_cache_corruption.
-	# Internal verification of VG structures.
-	# Check if CRC matches when a parsed VG is used multiple times. This
-	# is useful to catch unexpected changes to cached VG structures.
-	# Please only enable for debugging.
-	detect_internal_vg_cache_corruption = 0
-
 	# Configuration option global/metadata_read_only.
 	# No operations that change on-disk metadata are permitted.
 	# Additionally, read-only commands that encounter metadata in need of
--- a/5
+++ b/5
@ -6141,7 +6141,7 @@ fi


 for ac_header in assert.h ctype.h dirent.h errno.h fcntl.h float.h \
-  getopt.h inttypes.h langinfo.h libgen.h limits.h locale.h paths.h \
+  getopt.h inttypes.h langinfo.h libaio.h libgen.h limits.h locale.h paths.h \
  signal.h stdarg.h stddef.h stdio.h stdlib.h string.h sys/file.h \
  sys/ioctl.h syslog.h sys/mman.h sys/param.h sys/resource.h sys/stat.h \
  sys/time.h sys/types.h sys/utsname.h sys/wait.h time.h \
@ -15559,7 +15559,7 @@ _ACEOF


 ################################################################################
-ac_config_files="$ac_config_files Makefile make.tmpl libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile"
+ac_config_files="$ac_config_files Makefile make.tmpl libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/api/python_lvm_unit.py test/unit/Makefile tools/Makefile udev/Makefile"

 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
@ -16326,6 +16326,7 @@ do
    "scripts/Makefile") CONFIG_FILES="$CONFIG_FILES scripts/Makefile" ;;
    "test/Makefile") CONFIG_FILES="$CONFIG_FILES test/Makefile" ;;
    "test/api/Makefile") CONFIG_FILES="$CONFIG_FILES test/api/Makefile" ;;
+    "test/api/python_lvm_unit.py") CONFIG_FILES="$CONFIG_FILES test/api/python_lvm_unit.py" ;;
    "test/unit/Makefile") CONFIG_FILES="$CONFIG_FILES test/unit/Makefile" ;;
    "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;;
    "udev/Makefile") CONFIG_FILES="$CONFIG_FILES udev/Makefile" ;;
--- a/configure.ac
+++ b/configure.ac
@ -103,7 +103,7 @@ AC_HEADER_SYS_WAIT
 AC_HEADER_TIME

 AC_CHECK_HEADERS([assert.h ctype.h dirent.h errno.h fcntl.h float.h \
-  getopt.h inttypes.h langinfo.h libgen.h limits.h locale.h paths.h \
+  getopt.h inttypes.h langinfo.h libaio.h libgen.h limits.h locale.h paths.h \
  signal.h stdarg.h stddef.h stdio.h stdlib.h string.h sys/file.h \
  sys/ioctl.h syslog.h sys/mman.h sys/param.h sys/resource.h sys/stat.h \
  sys/time.h sys/types.h sys/utsname.h sys/wait.h time.h \
@ -2158,6 +2158,7 @@ scripts/lvmdump.sh
 scripts/Makefile
 test/Makefile
 test/api/Makefile
+test/api/python_lvm_unit.py
 test/unit/Makefile
 tools/Makefile
 udev/Makefile
--- a/daemons/lvmlockd/Makefile.in
+++ b/daemons/lvmlockd/Makefile.in
@ -27,6 +27,8 @@ ifeq ("@BUILD_LOCKDDLM@", "yes")
  LOCK_LIBS += -ldlm_lt
 endif

+SOURCES2 = lvmlockctl.c
+
 TARGETS = lvmlockd lvmlockctl

 .PHONY: install_lvmlockd
--- a/doc/lvm-disk-reading.txt
+++ b/doc/lvm-disk-reading.txt
@ -229,3 +229,110 @@ It may be worthwhile to change the filters to use the udev info as a hint,
 or only use udev info for filtering in reporting commands where
 inaccuracies are not a big problem.)

+
+
+I/O Performance
+---------------
+
+. 400 loop devices used as PVs
+. 40 VGs each with 10 PVs
+. each VG has one active LV
+. each of the 10 PVs in vg0 has an artificial 100 ms read delay
+. read/write/io_submit are system call counts using strace
+. old is lvm 2.2.175
+. new is lvm 2.2.178 (shortly before)
+
+
+Command: pvs
+------------
+old: 0m17.422s
+new: 0m0.331s
+
+old: read 7773 write 497
+new: read 2807 write 495 io_submit 448
+
+
+Command: vgs
+------------
+old: 0m20.383s
+new: 0m0.325s
+
+old: read 10684 write 129
+new: read  2807 write 129 io_submit 448
+
+
+Command: vgck vg0
+-----------------
+old: 0m16.212s
+new: 0m1.290s
+
+old: read 6372 write 4
+new: read 2807 write 4 io_submit 458
+
+
+Command: lvcreate -n test -l1 -an vg0
+-------------------------------------
+old: 0m29.271s
+new: 0m1.351s
+
+old: read 6503 write 39
+new: read 2808 write 9 io_submit 488
+
+
+Command: lvremove vg0/test
+--------------------------
+old: 0m29.262s
+new: 0m1.348s
+
+old: read 6502 write 36
+new: read 2807 write 6 io_submit 488
+
+
+io_submit sources
+-----------------
+
+vgs:
+  reads:
+  - 400 for each PV
+  - 40 for each LV
+  - 8 for other devs on the system
+
+vgck vg0:
+  reads:
+  - 400 for each PV
+  - 40 for each LV
+  - 10 for each PV in vg0 (rescan)
+  - 8 for other devs on the system
+
+lvcreate -n test -l1 -an vg0
+  reads:
+  - 400 for each PV
+  - 40 for each LV
+  - 10 for each PV in vg0 (rescan)
+  - 8 for other devs on the system
+  writes:
+  - 10 for metadata on each PV in vg0
+  - 10 for precommit on each PV in vg0
+  - 10 for commit on each PV in vg0
+
+
+
+With lvmetad
+------------
+
+Command: pvs
+------------
+old: 0m5.405s
+new: 0m1.404s
+
+Command: vgs
+------------
+old: 0m0.222s
+new: 0m0.223s
+
+Command: lvcreate -n test -l1 -an vg0
+-------------------------------------
+old: 0m10.128s
+new: 0m1.137s
+
+
--- a/include/configure.h.in
+++ b/include/configure.h.in
@ -245,6 +245,9 @@
 /* Define to 1 if you have the <langinfo.h> header file. */
 #undef HAVE_LANGINFO_H

+/* Define to 1 if you have the <libaio.h> header file. */
+#undef HAVE_LIBAIO_H
+
 /* Define to 1 if you have the <libcman.h> header file. */
 #undef HAVE_LIBCMAN_H

--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@ -643,7 +643,6 @@ void lvmcache_unlock_vgname(const char *vgname)

 	/* FIXME Do this per-VG */
 	if (strcmp(vgname, VG_GLOBAL) && !--_vgs_locked) {
-		dev_close_all();
 		dev_size_seqno_inc(); /* invalidate all cached dev sizes */
 	}
 }
@ -811,7 +810,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd,

 	dm_list_iterate_safe(devh, tmp, &devs) {
 		devl = dm_list_item(devh, struct device_list);
-		label_read(devl->dev, NULL, UINT64_C(0));
+		label_read(devl->dev);
 		dm_list_del(&devl->list);
 		dm_free(devl);
 	}
@ -977,6 +976,36 @@ int lvmcache_dev_is_unchosen_duplicate(struct device *dev)
 	return _dev_in_device_list(dev, &_unused_duplicate_devs);
 }

+/*
+ * Treat some duplicate devs as if they were filtered out by filters.
+ * The actual filters are evaluated too early, before a complete
+ * picture of all PVs is available, to eliminate these duplicates.
+ *
+ * By removing the filtered duplicates from unused_duplicate_devs, we remove
+ * the restrictions that are placed on using duplicate devs or VGs with
+ * duplicate devs.
+ *
+ * There may other kinds of duplicates that we want to ignore.
+ */
+
+static void _filter_duplicate_devs(struct cmd_context *cmd)
+{
+	struct dev_types *dt = cmd->dev_types;
+	struct lvmcache_info *info;
+	struct device_list *devl, *devl2;
+
+	dm_list_iterate_items_safe(devl, devl2, &_unused_duplicate_devs) {
+
+		info = lvmcache_info_from_pvid(devl->dev->pvid, NULL, 0);
+
+		if (MAJOR(info->dev->dev) == dt->md_major) {
+			log_debug_devs("Ignoring md component duplicate %s", dev_name(devl->dev));
+			dm_list_del(&devl->list);
+			dm_free(devl);
+		}
+	}
+}
+
 /*
 * Compare _found_duplicate_devs entries with the corresponding duplicate dev
 * in lvmcache.  There may be multiple duplicates in _found_duplicate_devs for
@ -1279,9 +1308,9 @@ next:
 int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid)
 {
 	struct dm_list devs;
-	struct device_list *devl;
+	struct device_list *devl, *devl2;
 	struct lvmcache_vginfo *vginfo;
-	struct lvmcache_info *info, *info2;
+	struct lvmcache_info *info;

 	if (lvmetad_used())
 		return 1;
@ -1310,8 +1339,9 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const
 		dm_list_add(&devs, &devl->list);
 	}

-	dm_list_iterate_items_safe(info, info2, &vginfo->infos)
-		lvmcache_del(info);
+	/* Delete info for each dev, deleting the last info will delete vginfo. */
+	dm_list_iterate_items(devl, &devs)
+		lvmcache_del_dev(devl->dev);

 	/* Dropping the last info struct is supposed to drop vginfo. */
 	if ((vginfo = lvmcache_vginfo_from_vgname(vgname, vgid)))
@ -1322,6 +1352,11 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const

 	label_scan_devs(cmd, cmd->filter, &devs);

+	dm_list_iterate_items_safe(devl, devl2, &devs) {
+		dm_list_del(&devl->list);
+		dm_free(devl);
+	}
+
 	if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) {
 		log_warn("VG info not found after rescan of %s", vgname);
 		return 0;
@ -1439,10 +1474,17 @@ int lvmcache_label_scan(struct cmd_context *cmd)

 		dm_list_iterate_items(devl, &add_cache_devs) {
 			log_debug_cache("Rescan preferred device %s for lvmcache", dev_name(devl->dev));
-			label_read(devl->dev, NULL, UINT64_C(0));
+			label_read(devl->dev);
 		}

 		dm_list_splice(&_unused_duplicate_devs, &del_cache_devs);
+
+		/*
+		 * We might want to move the duplicate device warnings until
+		 * after this filtering so that we can skip warning about
+		 * duplicates that we are filtering out.
+		 */
+		_filter_duplicate_devs(cmd);
 	}

 	/* Perform any format-specific scanning e.g. text files */
@ -1630,7 +1672,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd,
 		return NULL;
 	}

-	if (!(label_read(dev, NULL, UINT64_C(0))))
+	if (!label_read(dev))
 		return NULL;

 	return dev->pvid;
@ -1714,6 +1756,7 @@ void lvmcache_del(struct lvmcache_info *info)

 	info->label->labeller->ops->destroy_label(info->label->labeller,
 						  info->label);
+	label_destroy(info->label);
 	dm_free(info);
 }

@ -2457,15 +2500,6 @@ void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset)
 	}
 }

-int lvmcache_pvid_is_locked(const char *pvid) {
-	struct lvmcache_info *info;
-	info = lvmcache_info_from_pvid(pvid, NULL, 0);
-	if (!info || !info->vginfo)
-		return 0;
-
-	return lvmcache_vgname_is_locked(info->vginfo->vgname);
-}
-
 int lvmcache_fid_add_mdas(struct lvmcache_info *info, struct format_instance *fid,
 			  const char *id, int id_len)
 {
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@ -132,7 +132,6 @@ struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname,
 void lvmcache_drop_metadata(const char *vgname, int drop_precommitted);
 void lvmcache_commit_metadata(const char *vgname);

-int lvmcache_pvid_is_locked(const char *pvid);
 int lvmcache_fid_add_mdas(struct lvmcache_info *info, struct format_instance *fid,
 			  const char *id, int id_len);
 int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance *fid);
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
@ -768,7 +768,7 @@ cfg(global_activation_CFG, "activation", global_CFG_SECTION, 0, CFG_TYPE_BOOL, D
 	"the error messages.\n")

 cfg(global_fallback_to_lvm1_CFG, "fallback_to_lvm1", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, 0, vsn(1, 0, 18), NULL, 0, NULL,
-	"This setting setting no longer used.\n")
+	"This setting is no longer used.\n")

 cfg(global_format_CFG, "format", global_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_STRING, DEFAULT_FORMAT, vsn(1, 0, 0), NULL, 0, NULL,
 	"This setting is no longer used.\n")
--- a/lib/device/bcache.c
+++ b/lib/device/bcache.c
@ -15,7 +15,7 @@
 #define _GNU_SOURCE

 #include "lib/device/bcache.h"
-#include "device_mapper/misc/dm-logging.h"
+#include "lib/log/lvm-logging.h"
 #include "lib/log/log.h"

 #include <errno.h>
@ -667,6 +667,7 @@ static void _issue_low_level(struct block *b, enum dir d)

 	b->io_dir = d;
 	_set_flags(b, BF_IO_PENDING);
+	cache->nr_io_pending++;

 	dm_list_move(&cache->io_pending, &b->list);

--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@ -73,7 +73,6 @@ static void _dev_init(struct device *dev, int max_error_count)
 	dev->ext.src = DEV_EXT_NONE;

 	dm_list_init(&dev->aliases);
-	dm_list_init(&dev->open_list);
 }

 void dev_destroy_file(struct device *dev)
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@ -16,7 +16,6 @@
 #include "lib/misc/lib.h"
 #include "lib/device/device.h"
 #include "lib/metadata/metadata.h"
-#include "lib/cache/lvmcache.h"
 #include "lib/mm/memlock.h"
 #include "lib/locking/locking.h"

@ -53,7 +52,6 @@
 #  endif
 #endif

-static DM_LIST_INIT(_open_devices);
 static unsigned _dev_size_seqno = 1;

 static const char *_reasons[] = {
@ -199,7 +197,7 @@ int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, un
 	*physical_block_size = (unsigned int) dev->phys_block_size;
 	*block_size = (unsigned int) dev->block_size;
 out:
-	if (needs_open && !dev_close(dev))
+	if (needs_open && !dev_close_immediate(dev))
 		stack;

 	return r;
@ -349,7 +347,7 @@ static int _dev_get_size_dev(struct device *dev, uint64_t *size)

 	if (ioctl(fd, BLKGETSIZE64, size) < 0) {
 		log_sys_error("ioctl BLKGETSIZE64", name);
-		if (do_close && !dev_close(dev))
+		if (do_close && !dev_close_immediate(dev))
 			log_sys_error("close", name);
 		return 0;
 	}
@ -360,7 +358,7 @@ static int _dev_get_size_dev(struct device *dev, uint64_t *size)

 	log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);

-	if (do_close && !dev_close(dev))
+	if (do_close && !dev_close_immediate(dev))
 		log_sys_error("close", name);

 	return 1;
@ -380,7 +378,7 @@ static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead)

 	if (ioctl(dev->fd, BLKRAGET, &read_ahead_long) < 0) {
 		log_sys_error("ioctl BLKRAGET", dev_name(dev));
-		if (!dev_close(dev))
+		if (!dev_close_immediate(dev))
 			stack;
 		return 0;
 	}
@ -391,7 +389,7 @@ static int _dev_read_ahead_dev(struct device *dev, uint32_t *read_ahead)
 	log_very_verbose("%s: read_ahead is %u sectors",
 			 dev_name(dev), *read_ahead);

-	if (!dev_close(dev))
+	if (!dev_close_immediate(dev))
 		stack;

 	return 1;
@ -412,13 +410,13 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
 	if (ioctl(dev->fd, BLKDISCARD, &discard_range) < 0) {
 		log_error("%s: BLKDISCARD ioctl at offset %" PRIu64 " size %" PRIu64 " failed: %s.",
 			  dev_name(dev), offset_bytes, size_bytes, strerror(errno));
-		if (!dev_close(dev))
+		if (!dev_close_immediate(dev))
 			stack;
 		/* It doesn't matter if discard failed, so return success. */
 		return 1;
 	}

-	if (!dev_close(dev))
+	if (!dev_close_immediate(dev))
 		stack;

 	return 1;
@ -597,8 +595,6 @@ int dev_open_flags(struct device *dev, int flags, int direct, int quiet)
 	if ((flags & O_CREAT) && !(flags & O_TRUNC))
 		dev->end = lseek(dev->fd, (off_t) 0, SEEK_END);

-	dm_list_add(&_open_devices, &dev->open_list);
-
 	log_debug_devs("Opened %s %s%s%s", dev_name(dev),
 		       dev->flags & DEV_OPENED_RW ? "RW" : "RO",
 		       dev->flags & DEV_OPENED_EXCL ? " O_EXCL" : "",
@ -650,7 +646,6 @@ static void _close(struct device *dev)
 	dev->fd = -1;
 	dev->phys_block_size = -1;
 	dev->block_size = -1;
-	dm_list_del(&dev->open_list);

 	log_debug_devs("Closed %s", dev_name(dev));

@ -678,9 +673,7 @@ static int _dev_close(struct device *dev, int immediate)
 		log_debug_devs("%s: Immediate close attempt while still referenced",
 			       dev_name(dev));

-	/* Close unless device is known to belong to a locked VG */
-	if (immediate ||
-	    (dev->open_count < 1 && !lvmcache_pvid_is_locked(dev->pvid)))
+	if (immediate || (dev->open_count < 1))
 		_close(dev);

 	return 1;
@ -696,18 +689,6 @@ int dev_close_immediate(struct device *dev)
 	return _dev_close(dev, 1);
 }

-void dev_close_all(void)
-{
-	struct dm_list *doh, *doht;
-	struct device *dev;
-
-	dm_list_iterate_safe(doh, doht, &_open_devices) {
-		dev = dm_list_struct_base(doh, struct device, open_list);
-		if (dev->open_count < 1)
-			_close(dev);
-	}
-}
-
 static inline int _dev_is_valid(struct device *dev)
 {
 	return (dev->max_error_count == NO_DEV_ERROR_COUNT_LIMIT ||
--- a/lib/device/dev-md.c
+++ b/lib/device/dev-md.c
@ -134,8 +134,7 @@ static int _native_dev_is_md(struct device *dev, uint64_t *offset_found, int ful
 	 * Those checks can't be satisfied with the initial bcache data, and
 	 * would require an extra read i/o at the end of every device.  Issuing
 	 * an extra read to every device in every command, just to check for
-	 * the old md format is a bad tradeoff.  It's also not a big issue if
-	 * one happens to exist and we don't filter it out.
+	 * the old md format is a bad tradeoff.
 	 *
 	 * When "full" is set, we check a the start and end of the device for
 	 * md magic numbers.  When "full" is not set, we only check at the
@ -143,6 +142,13 @@ static int _native_dev_is_md(struct device *dev, uint64_t *offset_found, int ful
 	 * command if it should do a full check (cmd->use_full_md_check),
 	 * and set it for commands that could possibly write to an md dev
 	 * (pvcreate/vgcreate/vgextend).
+	 *
+	 * For old md versions with magic numbers at the end of devices,
+	 * the md dev components won't be filtered out here when full is 0,
+	 * so they will be scanned, and appear as duplicate PVs in lvmcache.
+	 * The md device itself will be chosen as the primary duplicate,
+	 * and the components are dropped from the list of duplicates in,
+	 * i.e. a kind of post-scan filtering.
 	 */
 	if (!full) {
 		sb_offset = 0;
--- a/lib/device/dev-type.c
+++ b/lib/device/dev-type.c
@ -215,6 +215,9 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev)
 	if (MAJOR(dev->dev) == dt->device_mapper_major)
 		return 1;

+	if (MAJOR(dev->dev) == dt->md_major)
+		return 1;
+
 	if (MAJOR(dev->dev) == dt->drbd_major)
 		return 1;

--- a/lib/device/device.h
+++ b/lib/device/device.h
@ -74,7 +74,6 @@ struct device {
 	unsigned size_seqno;
 	uint64_t size;
 	uint64_t end;
-	struct dm_list open_list;
 	struct dev_ext ext;

 	const char *vgid; /* if device is an LV */
@ -144,7 +143,6 @@ int dev_open_readonly_buffered(struct device *dev);
 int dev_open_readonly_quiet(struct device *dev);
 int dev_close(struct device *dev);
 int dev_close_immediate(struct device *dev);
-void dev_close_all(void);
 int dev_test_excl(struct device *dev);

 int dev_fd(struct device *dev);
--- a/lib/label/label.c
+++ b/lib/label/label.c
@ -251,9 +251,11 @@ static bool _in_bcache(struct device *dev)

 static struct labeller *_find_lvm_header(struct device *dev,
 				   char *scan_buf,
+				   uint32_t scan_buf_sectors,
 				   char *label_buf,
 				   uint64_t *label_sector,
-				   uint64_t scan_sector)
+				   uint64_t block_sector,
+				   uint64_t start_sector)
 {
 	struct labeller_i *li;
 	struct labeller *labeller_ret = NULL;
@ -266,25 +268,34 @@ static struct labeller *_find_lvm_header(struct device *dev,
 	 * and copy it into label_buf.
 	 */

-	for (sector = 0; sector < LABEL_SCAN_SECTORS;
+	for (sector = start_sector; sector < start_sector + LABEL_SCAN_SECTORS;
 	     sector += LABEL_SIZE >> SECTOR_SHIFT) {
+
+		/*
+		 * The scan_buf passed in is a bcache block, which is
+		 * BCACHE_BLOCK_SIZE_IN_SECTORS large.  So if start_sector is
+		 * one of the last couple sectors in that buffer, we need to
+		 * break early.
+		 */
+		if (sector >= scan_buf_sectors)
+			break;
+
 		lh = (struct label_header *) (scan_buf + (sector << SECTOR_SHIFT));

 		if (!strncmp((char *)lh->id, LABEL_ID, sizeof(lh->id))) {
 			if (found) {
 				log_error("Ignoring additional label on %s at sector %llu",
-					  dev_name(dev), (unsigned long long)(sector + scan_sector));
+					  dev_name(dev), (unsigned long long)(block_sector + sector));
 			}
-			if (xlate64(lh->sector_xl) != sector + scan_sector) {
-				log_very_verbose("%s: Label for sector %llu found at sector %llu - ignoring.",
-						 dev_name(dev),
-						 (unsigned long long)xlate64(lh->sector_xl),
-						 (unsigned long long)(sector + scan_sector));
+			if (xlate64(lh->sector_xl) != sector) {
+				log_warn("%s: Label for sector %llu found at sector %llu - ignoring.",
+					 dev_name(dev),
+					 (unsigned long long)xlate64(lh->sector_xl),
+					 (unsigned long long)(block_sector + sector));
 				continue;
 			}
-			if (calc_crc(INITIAL_CRC, (uint8_t *)&lh->offset_xl, LABEL_SIZE -
-				     ((uint8_t *) &lh->offset_xl - (uint8_t *) lh)) !=
-			    xlate32(lh->crc_xl)) {
+			if (calc_crc(INITIAL_CRC, (uint8_t *)&lh->offset_xl,
+				     LABEL_SIZE - ((uint8_t *) &lh->offset_xl - (uint8_t *) lh)) != xlate32(lh->crc_xl)) {
 				log_very_verbose("Label checksum incorrect on %s - ignoring", dev_name(dev));
 				continue;
 			}
@ -293,14 +304,14 @@ static struct labeller *_find_lvm_header(struct device *dev,
 		}

 		dm_list_iterate_items(li, &_labellers) {
-			if (li->l->ops->can_handle(li->l, (char *) lh, sector + scan_sector)) {
+			if (li->l->ops->can_handle(li->l, (char *) lh, block_sector + sector)) {
 				log_very_verbose("%s: %s label detected at sector %llu", 
 						 dev_name(dev), li->name,
-						 (unsigned long long)(sector + scan_sector));
+						 (unsigned long long)(block_sector + sector));
 				if (found) {
 					log_error("Ignoring additional label on %s at sector %llu",
 						  dev_name(dev),
-						  (unsigned long long)(sector + scan_sector));
+						  (unsigned long long)(block_sector + sector));
 					continue;
 				}

@ -309,7 +320,7 @@ static struct labeller *_find_lvm_header(struct device *dev,

 				memcpy(label_buf, lh, LABEL_SIZE);
 				if (label_sector)
-					*label_sector = sector + scan_sector;
+					*label_sector = block_sector + sector;
 				break;
 			}
 		}
@ -329,7 +340,9 @@ static struct labeller *_find_lvm_header(struct device *dev,
 * are performed in the processing functions to get that data.
 */
 static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
-			  struct device *dev, struct block *bb, int *is_lvm_device)
+			  struct device *dev, struct block *bb,
+			  uint64_t block_sector, uint64_t start_sector,
+			  int *is_lvm_device)
 {
 	char label_buf[LABEL_SIZE] __attribute__((aligned(8)));
 	struct label *label = NULL;
@ -345,7 +358,7 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
 	 * data had been read (here).  They set this flag to indicate that the
 	 * filters should be retested now that data from the device is ready.
 	 */
-	if (cmd && (dev->flags & DEV_FILTER_AFTER_SCAN)) {
+	if (f && (dev->flags & DEV_FILTER_AFTER_SCAN)) {
 		dev->flags &= ~DEV_FILTER_AFTER_SCAN;

 		log_debug_devs("Scan filtering %s", dev_name(dev));
@ -374,7 +387,7 @@ static int _process_block(struct cmd_context *cmd, struct dev_filter *f,
 	 * FIXME: we don't need to copy one sector from bb->data into label_buf,
 	 * we can just point label_buf at one sector in ld->buf.
 	 */
-	if (!(labeller = _find_lvm_header(dev, bb->data, label_buf, &sector, 0))) {
+	if (!(labeller = _find_lvm_header(dev, bb->data, BCACHE_BLOCK_SIZE_IN_SECTORS, label_buf, &sector, block_sector, start_sector))) {

 		/*
 		 * Non-PVs exit here
@ -567,7 +580,7 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
 		} else {
 			log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->bcache_fd, bb);

-			ret = _process_block(cmd, f, devl->dev, bb, &is_lvm_device);
+			ret = _process_block(cmd, f, devl->dev, bb, 0, 0, &is_lvm_device);

 			if (!ret && is_lvm_device) {
 				log_debug_devs("Scan failed to process %s", dev_name(devl->dev));
@ -610,32 +623,46 @@ static int _scan_list(struct cmd_context *cmd, struct dev_filter *f,
 	return 1;
 }

+/*
+ * How many blocks to set up in bcache?  Is 1024 a good max?
+ *
+ * Currently, we tell bcache to set up N blocks where N
+ * is the number of devices that are going to be scanned.
+ * Reasons why this number may not be be a good choice:
+ *
+ * - there may be a lot of non-lvm devices, which
+ *   would make this number larger than necessary
+ *
+ * - each lvm device may use more than one cache
+ *   block if the metadata is large enough or it
+ *   uses more than one metadata area, which
+ *   would make this number smaller than it
+ *   should be for the best performance.
+ *
+ * This is even more tricky to estimate when lvmetad
+ * is used, because it's hard to predict how many
+ * devs might need to be scanned when using lvmetad.
+ * This currently just sets up bcache with MIN blocks.
+ */
+
 #define MIN_BCACHE_BLOCKS 32
+#define MAX_BCACHE_BLOCKS 1024

 static int _setup_bcache(int cache_blocks)
 {
 	struct io_engine *ioe;

-	/* No devices can happen, just create bcache with any small number. */
 	if (cache_blocks < MIN_BCACHE_BLOCKS)
 		cache_blocks = MIN_BCACHE_BLOCKS;

-	/*
-	 * 100 is arbitrary, it's the max number of concurrent aio's
-	 * possible, i.e, the number of devices that can be read at
-	 * once.  Should this be configurable?
-	 */
+	if (cache_blocks > MAX_BCACHE_BLOCKS)
+		cache_blocks = MAX_BCACHE_BLOCKS;
+
 	if (!(ioe = create_async_io_engine())) {
 		log_error("Failed to create bcache io engine.");
 		return 0;
 	}

-	/*
-	 * Configure one cache block for each device on the system.
-	 * We won't generally need to cache that many because some
-	 * of the devs will not be lvm devices, and we don't need
-	 * an entry for those.  We might want to change this.
-	 */
 	if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) {
 		log_error("Failed to create bcache with %d cache blocks.", cache_blocks);
 		return 0;
@ -653,7 +680,7 @@ int label_scan(struct cmd_context *cmd)
 {
 	struct dm_list all_devs;
 	struct dev_iter *iter;
-	struct device_list *devl;
+	struct device_list *devl, *devl2;
 	struct device *dev;

 	log_debug_devs("Finding devices to scan");
@ -695,16 +722,17 @@ int label_scan(struct cmd_context *cmd)
 	log_debug_devs("Found %d devices to scan", dm_list_size(&all_devs));

 	if (!scan_bcache) {
-		/*
-		 * FIXME: there should probably be some max number of
-		 * cache blocks we use when setting up bcache.
-		 */
 		if (!_setup_bcache(dm_list_size(&all_devs)))
 			return 0;
 	}

 	_scan_list(cmd, cmd->full_filter, &all_devs, NULL);

+	dm_list_iterate_items_safe(devl, devl2, &all_devs) {
+		dm_list_del(&devl->list);
+		dm_free(devl);
+	}
+
 	return 1;
 }

@ -834,9 +862,7 @@ void label_scan_destroy(struct cmd_context *cmd)
 * device, this is not a commonly used function.
 */

-/* FIXME: remove unused_sector arg */
-
-int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector)
+int label_read(struct device *dev)
 {
 	struct dm_list one_dev;
 	struct device_list *devl;
@ -856,17 +882,7 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector

 	_scan_list(NULL, NULL, &one_dev, &failed);

-	/*
-	 * FIXME: this ugliness of returning a pointer to the label is
-	 * temporary until the callers can be updated to not use this.
-	 */
-	if (labelp) {
-		struct lvmcache_info *info;
-
-		info = lvmcache_info_from_pvid(dev->pvid, dev, 1);
-		if (info)
-			*labelp = lvmcache_get_label(info);
-	}
+	dm_free(devl);

 	if (failed)
 		return 0;
@ -875,25 +891,66 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector

 /*
 * Read a label from a specfic, non-zero sector.  This is used in only
- * one place: pvck -> pv_analyze.
+ * one place: pvck/pv_analyze.
 */

-int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector)
+int label_read_sector(struct device *dev, uint64_t read_sector)
 {
-	if (scan_sector) {
-		/* TODO: not yet implemented */
-		/* When is this done?  When does it make sense?  Is it actually possible? */
-		return 0;
+	struct block *bb = NULL;
+	uint64_t block_num;
+	uint64_t block_sector;
+	uint64_t start_sector;
+	int is_lvm_device = 0;
+	int result;
+	int ret;
+
+	block_num = read_sector / BCACHE_BLOCK_SIZE_IN_SECTORS;
+	block_sector = block_num * BCACHE_BLOCK_SIZE_IN_SECTORS;
+	start_sector = read_sector % BCACHE_BLOCK_SIZE_IN_SECTORS;
+
+	label_scan_open(dev);
+
+	bcache_prefetch(scan_bcache, dev->bcache_fd, block_num);
+
+	if (!bcache_get(scan_bcache, dev->bcache_fd, block_num, 0, &bb)) {
+		log_error("Scan failed to read %s at %llu",
+			  dev_name(dev), (unsigned long long)block_num);
+		ret = 0;
+		goto out;
 	}

-	return label_read(dev, labelp, 0);
+	/*
+	 * TODO: check if scan_sector is larger than the bcache block size.
+	 * If it is, we need to fetch a later block from bcache.
+	 */
+
+	result = _process_block(NULL, NULL, dev, bb, block_sector, start_sector, &is_lvm_device);
+
+	if (!result && is_lvm_device) {
+		log_error("Scan failed to process %s", dev_name(dev));
+		ret = 0;
+		goto out;
+	}
+
+	if (!result || !is_lvm_device) {
+		log_error("Could not find LVM label on %s", dev_name(dev));
+		ret = 0;
+		goto out;
+	}
+
+	ret = 1;
+out:
+	if (bb)
+		bcache_put(bb);
+	return ret;
 }

 /*
 * This is only needed when commands are using lvmetad, in which case they
 * don't do an initial label_scan, but may later need to rescan certain devs
 * from disk and call this function.  FIXME: is there some better number to
- * choose here?
+ * choose here?  How should we predict the number of devices that might need
+ * scanning when using lvmetad?
 */

 int label_scan_setup_bcache(void)
@ -922,18 +979,10 @@ int label_scan_open(struct device *dev)

 bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data)
 {
-	int ret;
-
 	if (!scan_bcache) {
-		if (!dev_open_readonly(dev))
-			return false;
-
-		ret = dev_read(dev, start, len, 0, data);
-
-		if (!dev_close(dev))
-			stack;
-
-		return ret ? true : false;
+		/* Should not happen */
+		log_error("dev_read bcache not set up %s", dev_name(dev));
+		return false;
 	}

 	if (dev->bcache_fd <= 0) {
@ -956,21 +1005,13 @@ bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data)

 bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data)
 {
-	int ret;
-
 	if (test_mode())
 		return true;

 	if (!scan_bcache) {
-		if (!dev_open(dev))
-			return false;
-
-		ret = dev_write(dev, start, len, 0, data);
-
-		if (!dev_close(dev))
-			stack;
-
-		return ret ? true : false;
+		/* Should not happen */
+		log_error("dev_write bcache not set up %s", dev_name(dev));
+		return false;
 	}

 	if (dev->bcache_fd <= 0) {
@ -1003,7 +1044,7 @@ bool dev_write_zeros(struct device *dev, uint64_t start, size_t len)
 		return true;

 	if (!scan_bcache) {
-		log_error("dev_write_zeros %s bcache not set up", dev_name(dev));
+		log_error("dev_write_zeros bcache not set up %s", dev_name(dev));
 		return false;
 	}

@ -1037,7 +1078,7 @@ bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val)
 		return true;

 	if (!scan_bcache) {
-		log_error("dev_set_bytes %s bcache not set up", dev_name(dev));
+		log_error("dev_set_bytes bcache not set up %s", dev_name(dev));
 		return false;
 	}

--- a/lib/label/label.h
+++ b/lib/label/label.h
@ -109,8 +109,8 @@ void label_scan_invalidate(struct device *dev);
 void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv);
 void label_scan_drop(struct cmd_context *cmd);
 void label_scan_destroy(struct cmd_context *cmd);
-int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector);
-int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector);
+int label_read(struct device *dev);
+int label_read_sector(struct device *dev, uint64_t scan_sector);
 void label_scan_confirm(struct device *dev);
 int label_scan_setup_bcache(void);
 int label_scan_open(struct device *dev);
--- a/lib/metadata/metadata-liblvm.c
+++ b/lib/metadata/metadata-liblvm.c
@ -483,7 +483,6 @@ static int _pvremove_check(struct cmd_context *cmd, const char *name,
 {
 	static const char really_wipe_msg[] = "Really WIPE LABELS from physical volume";
 	struct device *dev;
-	struct label *label;
 	struct pv_list *pvl;
 	struct physical_volume *pv = NULL;
 	int used;
@ -498,7 +497,7 @@ static int _pvremove_check(struct cmd_context *cmd, const char *name,

 	/* Is there a pv here already? */
 	/* If not, this is an error unless you used -f. */
-	if (!label_read(dev, &label, 0)) {
+	if (!label_read(dev)) {
 		if (force_count)
 			return 1;
 		log_error("No PV label found on %s.", name);
--- a/lib/metadata/metadata.c
+++ b/lib/metadata/metadata.c
@ -4058,7 +4058,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd,
 						release_vg(correct_vg);
 						correct_vg = NULL;
 						lvmcache_del(info);
-						label_read(pvl->pv->dev, NULL, 0);
+						label_read(pvl->pv->dev);
 						goto restart_scan;
 					}
 #endif
@ -5192,8 +5192,6 @@ static struct volume_group *_recover_vg(struct cmd_context *cmd,

 	unlock_vg(cmd, NULL, vg_name);

-	dev_close_all();
-
 	if (!lock_vol(cmd, vg_name, LCK_VG_WRITE, NULL))
 		return_NULL;

--- a/lib/misc/lvm-exec.c
+++ b/lib/misc/lvm-exec.c
@ -78,7 +78,6 @@ int exec_cmd(struct cmd_context *cmd, const char *const argv[],
 	if (!pid) {
 		/* Child */
 		reset_locking();
-		dev_close_all();
 		/* FIXME Fix effect of reset_locking on cache then include this */
 		/* destroy_toolcontext(cmd); */
 		/* FIXME Use execve directly */
--- a/liblvm/lvm_vg.c
+++ b/liblvm/lvm_vg.c
@ -186,6 +186,8 @@ int lvm_vg_close(vg_t vg)
 	struct saved_env e = store_user_env(vg->cmd);
 	if (vg_read_error(vg) == FAILED_LOCKING)
 		release_vg(vg);
+	else if (!lvmcache_vgname_is_locked(vg->name))
+		release_vg(vg);
 	else
 		unlock_and_release_vg(vg->cmd, vg, vg->name);
 	restore_user_env(&e);
--- a/man/lvconvert.8_pregen
+++ b/man/lvconvert.8_pregen
@ -503,7 +503,7 @@ Merge LV images that were split from a raid1 LV.
 Convert LV to a thin LV, using the original LV as an external origin.
 .br
 .P
-\fBlvconvert\fP \fB--type\fP \fBthin\fP \fB--thinpool\fP \fILV\fP \fILV\fP\fI_linear_striped_cache_raid\fP
+\fBlvconvert\fP \fB--type\fP \fBthin\fP \fB--thinpool\fP \fILV\fP \fILV\fP\fI_linear_striped_thin_cache_raid\fP
 .br
 .RS 4
 .ad l
@ -1530,7 +1530,7 @@ Convert LV to a thin LV, using the original LV as an external origin
 (infers --type thin).
 .br
 .P
-\fBlvconvert\fP \fB-T\fP|\fB--thin\fP \fB--thinpool\fP \fILV\fP \fILV\fP\fI_linear_striped_cache_raid\fP
+\fBlvconvert\fP \fB-T\fP|\fB--thin\fP \fB--thinpool\fP \fILV\fP \fILV\fP\fI_linear_striped_thin_cache_raid\fP
 .br
 .RS 4
 .ad l
@ -1714,7 +1714,9 @@ origin LV (first arg) to reverse a splitsnapshot command.
 .br
 -

-Poll LV to continue conversion (also see --startpoll).
+Poll LV to continue conversion (also see --startpoll) 
+.br
+or waits till conversion/mirror syncing is finished
 .br
 .P
 \fBlvconvert\fP \fILV\fP\fI_mirror_raid\fP
--- a/man/pvcreate.8_pregen
+++ b/man/pvcreate.8_pregen
@ -38,7 +38,7 @@ normally prevent it, e.g. if the PV is already in a VG.
 .ad b
 .br
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -266,12 +266,11 @@ The size may be rounded.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/man/vgcfgrestore.8_pregen
+++ b/man/vgcfgrestore.8_pregen
@ -51,7 +51,7 @@ vgcfgrestore - Restore volume group configuration
 .ad b
 .br
 .ad l
- \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .ad b
 .br
 .ad l
@ -141,7 +141,7 @@ Common options for command:
 .
 .RS 4
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -280,12 +280,11 @@ Display long help text.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/man/vgconvert.8_pregen
+++ b/man/vgconvert.8_pregen
@ -8,12 +8,9 @@ vgconvert - Change volume group metadata format
    [ \fIoption_args\fP ]
 .br
 .SH DESCRIPTION
-vgconvert converts VG metadata from one format to another.  The new
-metadata format must be able to fit into the space provided by the old
+vgconvert converts VG metadata from one format to another.  This command
+is no longer used because this version of lvm no longer supports the LVM1
 format.
-
-Because the LVM1 format should no longer be used, this command is no
-longer needed in general.
 .SH USAGE
 \fBvgconvert\fP \fIVG\fP ...
 .br
@ -23,7 +20,7 @@ longer needed in general.
 .ad b
 .br
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -194,12 +191,11 @@ The size may be rounded.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/man/vgcreate.8_pregen
+++ b/man/vgcreate.8_pregen
@ -33,7 +33,7 @@ devices are also available with vgcreate.
 .ad b
 .br
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -324,12 +324,11 @@ The size may be rounded.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/man/vgextend.8_pregen
+++ b/man/vgextend.8_pregen
@ -36,7 +36,7 @@ will initialize them. In this case pvcreate options can be used, e.g.
 .ad b
 .br
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -237,12 +237,11 @@ The size may be rounded.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/man/vgsplit.8_pregen
+++ b/man/vgsplit.8_pregen
@ -62,7 +62,7 @@ Common options for command:
 .ad b
 .br
 .ad l
-[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP ]
+[ \fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP ]
 .ad b
 .br
 .ad l
@ -230,12 +230,11 @@ and --vgmetadatacopies for improving performance.
 .ad b
 .HP
 .ad l
-\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP|\fBlvm1\fP
+\fB-M\fP|\fB--metadatatype\fP \fBlvm2\fP
 .br
 Specifies the type of on-disk metadata to use.
 \fBlvm2\fP (or just \fB2\fP) is the current, standard format.
-\fBlvm1\fP (or just \fB1\fP) is a historical format that
-can be used for accessing old data.
+\fBlvm1\fP (or just \fB1\fP) is no longer used.
 .ad b
 .HP
 .ad l
--- a/python/setup.py.in
+++ b/python/setup.py.in
@ -21,7 +21,7 @@ from distutils.core import setup, Extension
 liblvm = Extension('lvm',
                    sources = ['liblvm_python.c'],
                    libraries= ['lvm2app', 'devmapper'],
-                    library_dirs= ['@top_builddir@/liblvm'],
+                    library_dirs= ['@top_builddir@/liblvm', '@top_builddir@/libdm'],
                    include_dirs= ['@top_builddir@/include'])

 setup (name='lvm',
--- a/test/Makefile.in
+++ b/test/Makefile.in
@ -27,7 +27,7 @@ datarootdir = @datarootdir@

 LVM_TEST_RESULTS ?= results

-SUBDIRS = api
+SUBDIRS = api unit
 SOURCES = lib/not.c lib/harness.c
 CXXSOURCES = lib/runner.cpp
 CXXFLAGS += $(EXTRA_EXEC_CFLAGS)
@ -37,9 +37,9 @@ include $(top_builddir)/make.tmpl
 T ?= .
 S ?= @ # never match anything by default
 VERBOSE ?= 0
-ALL := $(shell find -L $(srcdir) \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) | $(SORT))
+ALL := $(shell find -L $(srcdir) \( -path \*/shell/\*.sh -or -path \*/api/\*.sh -or -path \*/unit/\*.sh \) | $(SORT))
 comma = ,
-RUN := $(shell find -L $(srcdir) -regextype posix-egrep \( -path \*/shell/\*.sh -or -path \*/api/\*.sh \) -and -regex "$(srcdir)/.*($(subst $(comma),|,$(T))).*" -and -not -regex "$(srcdir)/.*($(subst $(comma),|,$(S))).*" | $(SORT))
+RUN := $(shell find -L $(srcdir) -regextype posix-egrep \( -path \*/shell/\*.sh -or -path \*/api/\*.sh -or -path \*/unit/\*.sh \) -and -regex "$(srcdir)/.*($(subst $(comma),|,$(T))).*" -and -not -regex "$(srcdir)/.*($(subst $(comma),|,$(S))).*" | $(SORT))
 RUN_BASE = $(subst $(srcdir)/,,$(RUN))

 ifeq ("@BUILD_LVMETAD@", "yes")
@ -83,6 +83,7 @@ help:
 	@echo "  check_lvmlockd_sanlock Run tests with lvmlockd and sanlock."
 	@echo "  check_lvmlockd_dlm     Run tests with lvmlockd and dlm."
 	@echo "  check_lvmlockd_test    Run tests with lvmlockd --test."
+	@echo "  run-unit-test          Run only unit tests (root not needed)."
 	@echo "  clean			Clean dir."
 	@echo "  help			Display callable targets."
 	@echo -e "\nSupported variables:"
@ -90,6 +91,7 @@ help:
 	@echo "  LVM_TEST_BACKING_DEVICE Set device used for testing (see also LVM_TEST_DIR)."
 	@echo "  LVM_TEST_CAN_CLOBBER_DMESG Allow to clobber dmesg buffer without /dev/kmsg. (1)"
 	@echo "  LVM_TEST_DEVDIR	Set to '/dev' to run on real /dev."
+	@echo "  LVM_TEST_PREFER_BRD	Prefer using brd (ramdisk) over loop for testing [1]."
 	@echo "  LVM_TEST_DIR		Where to create test files  [$(LVM_TEST_DIR)]."
 	@echo "  LVM_TEST_LOCKING	Normal (1), Cluster (3)."
 	@echo "  LVM_TEST_LVMETAD	Start lvmetad (1)."
@ -189,6 +191,9 @@ check_lvmlockd_test: .tests-stamp
 		--flavours udev-lvmlockd-test --only $(T) --skip $(S)
 endif

+run-unit-test unit-test:
+	$(MAKE) -C unit $(@)
+
 DATADIR = $(datadir)/lvm2-testsuite
 EXECDIR = $(libexecdir)/lvm2-testsuite

@ -221,11 +226,13 @@ LIB_SHARED = check aux inittest utils get lvm-wrapper

 install: .tests-stamp lib/paths-installed
 	@echo $(srcdir)
-	$(INSTALL_DIR) $(DATADIR)/{shell,api,lib,dbus} $(EXECDIR)
+	$(INSTALL_DIR) $(DATADIR)/{shell,api,unit,lib,dbus} $(EXECDIR)
 	$(INSTALL_DATA) shell/*.sh $(DATADIR)/shell
 	$(INSTALL_DATA) api/*.sh $(DATADIR)/api
+	$(INSTALL_DATA) unit/*.sh $(DATADIR)/unit
 	$(INSTALL_DATA) lib/mke2fs.conf $(DATADIR)/lib
 	$(INSTALL_PROGRAM) api/*.{t,py} $(DATADIR)/api
+	$(INSTALL_PROGRAM) unit/unit-test $(DATADIR)/unit
 	$(INSTALL_PROGRAM) dbus/*.py $(DATADIR)/dbus/
 	$(INSTALL_DATA) lib/paths-installed $(DATADIR)/lib/paths
 	cd lib && $(INSTALL_DATA) \
--- a/test/api/pytest.sh
+++ b/test/api/pytest.sh
@ -31,7 +31,7 @@ aux prepare_dmeventd

 #Locate the python binding library to use.
 if [[ -n "${abs_top_builddir+varset}" ]]; then
-  python_lib=($(find "$abs_top_builddir" -name lvm.so))
+  python_lib=($(find "$abs_top_builddir" -name lvm*.so))
  if [[ ${#python_lib[*]} -ne 1 ]]; then
    if [[ ${#python_lib[*]} -gt 1 ]]; then
      # Unable to test python bindings if multiple libraries found:
@ -58,6 +58,8 @@ aux prepare_pvs 6
 PY_UNIT_PVS=$(cat DEVICES)
 export PY_UNIT_PVS

+python_lvm_unit.py -v -f TestLvm.test_lv_persistence
+exit
 #python_lvm_unit.py -v -f

 # Run individual tests for shorter error trace
--- a/test/api/python_lvm_unit.py.in
+++ b/test/api/python_lvm_unit.py.in
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!@PYTHON@

 # Copyright (C) 2012-2013 Red Hat, Inc. All rights reserved.
 #
--- a/test/lib/aux.sh
+++ b/test/lib/aux.sh
@ -477,6 +477,7 @@ teardown_devs() {

 	test ! -f MD_DEV || cleanup_md_dev
 	test ! -f DEVICES || teardown_devs_prefixed "$PREFIX"
+	test ! -f RAMDISK || { modprobe -r brd || true ; }

 	# NOTE: SCSI_DEBUG_DEV test must come before the LOOP test because
 	# prepare_scsi_debug_dev() also sets LOOP to short-circuit prepare_loop()
@ -489,7 +490,7 @@ teardown_devs() {
 	fi

 	not diff LOOP BACKING_DEV >/dev/null 2>&1 || rm -f BACKING_DEV
-	rm -f DEVICES LOOP
+	rm -f DEVICES LOOP RAMDISK

 	# Attempt to remove any loop devices that failed to get torn down if earlier tests aborted
 	test "${LVM_TEST_PARALLEL:-0}" -eq 1 || test -z "$COMMON_PREFIX" || {
@ -502,6 +503,7 @@ teardown_devs() {
 			udev_wait
 		}
 	}
+	restore_dm_mirror
 }

 kill_sleep_kill_() {
@ -631,14 +633,14 @@ teardown() {
 	test -n "$TESTDIR" && {
 		cd "$TESTOLDPWD" || die "Failed to enter $TESTOLDPWD"
 		# after this delete no further write is possible
-		rm -rf "$TESTDIR" || echo BLA
+		rm -rf "${TESTDIR:?}" || echo BLA
 	}

 	echo "ok"
 }

 prepare_loop() {
-	local size=${1=32}
+	local size=$1
 	shift # all other params are directly passed to all 'losetup' calls
 	local i
 	local slash
@ -691,6 +693,17 @@ prepare_loop() {
 	echo "ok ($LOOP)"
 }

+prepare_ramdisk() {
+	local size=$1
+
+	echo -n "## preparing ramdisk device..."
+	modprobe brd rd_size=$((size * 1024)) || return
+
+	BACKING_DEV=/dev/ram0
+	echo "ok ($BACKING_DEV)"
+	touch RAMDISK
+}
+
 # A drop-in replacement for prepare_loop() that uses scsi_debug to create
 # a ramdisk-based SCSI device upon which all LVM devices will be created
 # - scripts must take care not to use a DEV_SIZE that will enduce OOM-killer
@ -818,14 +831,33 @@ cleanup_md_dev() {
 }

 prepare_backing_dev() {
+	local size=${1=32}
+	shift
+
 	if test -f BACKING_DEV; then
 		BACKING_DEV=$(< BACKING_DEV)
+		return 0
 	elif test -b "$LVM_TEST_BACKING_DEVICE"; then
 		BACKING_DEV=$LVM_TEST_BACKING_DEVICE
 		echo "$BACKING_DEV" > BACKING_DEV
-	else
-		prepare_loop "$@"
+		return 0
+	elif test "${LVM_TEST_PREFER_BRD-1}" = "1" && \
+	     test ! -d /sys/block/ram0 && \
+	     kernel_at_least 4 16 && \
+	     test "$size" -lt 16384; then
+		# try to use ramdisk if possible, but for
+		# big allocs (>16G) do not try to use ramdisk
+		# Also we can't use BRD device prior kernel 4.16
+		# since they were DAX based and lvm2 often relies
+		# in save table loading between exiting backend device
+		# and  bio-based 'error' device.
+		# However with request based DAX brd device we get this:
+		# device-mapper: ioctl: can't change device type after initial table load.
+		prepare_ramdisk "$size" "$@" && return
+		echo "(failed)"
 	fi
+
+	prepare_loop "$size" "$@"
 }

 prepare_devs() {
@ -844,6 +876,7 @@ prepare_devs() {
 	prepare_backing_dev $(( n * devsize ))
 	# shift start of PV devices on /dev/loopXX by 1M
 	not diff LOOP BACKING_DEV >/dev/null 2>&1 || shift=2048
+	blkdiscard "$BACKING_DEV" 2>/dev/null || true
 	echo -n "## preparing $n devices..."

 	local size=$(( devsize * 2048 )) # sectors
@ -870,8 +903,7 @@ prepare_devs() {
 	fi

 	# non-ephemeral devices need to be cleared between tests
-	test -f LOOP || for d in "${DEVICES[@]}"; do
-		blkdiscard "$d" 2>/dev/null || true
+	test -f LOOP -o -f RAMDISK || for d in "${DEVICES[@]}"; do
 		# ensure disk header is always zeroed
 		dd if=/dev/zero of="$d" bs=32k count=1
 		wipefs -a "$d" 2>/dev/null || true
@ -1032,6 +1064,23 @@ enable_dev() {
 	done
 }

+# Throttle down performance of kcopyd when mirroring i.e. disk image
+throttle_sys="/sys/module/dm_mirror/parameters/raid1_resync_throttle"
+throttle_dm_mirror() {
+	test -e "$throttle_sys" || return
+	test -f THROTTLE || cat "$throttle_sys" > THROTTLE
+	echo ${1-1} > "$throttle_sys"
+}
+
+# Restore original kcopyd throttle value and have mirroring fast again
+restore_dm_mirror() {
+	test ! -f THROTTLE || {
+		cat THROTTLE > "$throttle_sys"
+		rm -f THROTTLE
+	}
+}
+
+
 # Once there is $name.devtable
 # this is a quick way to restore to this table entry
 restore_from_devtable() {
@ -1312,6 +1361,11 @@ apitest() {
 	"$TESTOLDPWD/api/$1.t" "${@:2}" && rm -f debug.log strace.log
 }

+unittest() {
+	test -x "$TESTOLDPWD/unit/unit-test" || skip
+	"$TESTOLDPWD/unit/unit-test" "${@}"
+}
+
 mirror_recovery_works() {
 	case "$(uname -r)" in
 	  3.3.4-5.fc17.i686|3.3.4-5.fc17.x86_64) return 1 ;;
@ -1472,6 +1526,10 @@ driver_at_least() {
 }

 have_thin() {
+	lvm segtypes 2>/dev/null | grep -q thin$ || {
+		echo "Thin is not built-in." >&2
+		return 1
+	}
 	target_at_least dm-thin-pool "$@"

 	declare -a CONF=()
@ -1512,9 +1570,9 @@ have_raid4 () {
 }

 have_cache() {
-	test "$CACHE" = shared -o "$CACHE" = internal || {
+	lvm segtypes 2>/dev/null | grep -q cache$ || {
 		echo "Cache is not built-in." >&2
-		return 1;
+		return 1
 	}
 	target_at_least dm-cache "$@"

--- a/test/lib/check.sh
+++ b/test/lib/check.sh
@ -422,10 +422,11 @@ sysfs() {
 	# read maj min and also convert hex to decimal
 	local maj
 	local min
-	local P="/sys/dev/block/$maj:$min/$2"
+	local P
 	local val
 	maj=$(($(stat -L --printf=0x%t "$1")))
 	min=$(($(stat -L --printf=0x%T "$1")))
+	P="/sys/dev/block/$maj:$min/$2"
 	val=$(< "$P") || return 0 # no sysfs ?
 	test "$val" -eq "$3" || \
 		die "$1: $P = $val differs from expected value $3!"
--- a/test/lib/inittest.sh
+++ b/test/lib/inittest.sh
@ -47,13 +47,14 @@ SKIP_WITH_LVMETAD=${SKIP_WITH_LVMETAD-}

 SKIP_WITH_LVMPOLLD=${SKIP_WITH_LVMPOLLD-}
 SKIP_WITH_LVMLOCKD=${SKIP_WITH_LVMLOCKD-}
+SKIP_ROOT_DM_CHECK=${SKIP_ROOT_DM_CHECK-}

 if test -n "$LVM_TEST_FLAVOUR"; then
 	. "lib/flavour-$LVM_TEST_FLAVOUR"
 fi

 test -n "$SKIP_WITHOUT_CLVMD" && test "$LVM_TEST_LOCKING" -ne 3 && initskip
-test -n "$SKIP_WITH_CLVMD" && test "$LVM_TEST_LOCKING" -eq 3 && initskip
+test -n "$SKIP_WITH_CLVMD" && test "$LVM_TEST_LOCKING" = 3 && initskip

 test -n "$SKIP_WITHOUT_LVMETAD" && test -z "$LVM_TEST_LVMETAD" && initskip
 test -n "$SKIP_WITH_LVMETAD" && test -n "$LVM_TEST_LVMETAD" && initskip
@ -75,7 +76,9 @@ COMMON_PREFIX="LVMTEST"
 PREFIX="${COMMON_PREFIX}$$"

 # Check we are not conflickting with some exiting setup
-dmsetup table | not grep "${PREFIX}[^0-9]" || die "DM table already has devices with prefix $PREFIX!"
+if test -z "$SKIP_ROOT_DM_CHECK" ; then
+	dmsetup table | not grep "${PREFIX}[^0-9]" || die "DM table already has devices with prefix $PREFIX!"
+fi

 if test -z "$LVM_TEST_DIR"; then LVM_TEST_DIR=$TMPDIR; fi
 TESTDIR=$(mkdtemp "${LVM_TEST_DIR:-/tmp}" "$PREFIX.XXXXXXXXXX") || \
@ -88,8 +91,13 @@ LVM_LOG_FILE_MAX_LINES=${LVM_LOG_FILE_MAX_LINES-1000000}
 LVM_EXPECTED_EXIT_STATUS=1
 export LVM_LOG_FILE_EPOCH LVM_LOG_FILE_MAX_LINES LVM_EXPECTED_EXIT_STATUS

-test -n "$BASH" && trap 'set +vx; STACKTRACE; set -vx' ERR
-trap 'aux teardown' EXIT # don't forget to clean up
+if test -z "$SKIP_ROOT_DM_CHECK" ; then
+	# Teardown only with root
+	test -n "$BASH" && trap 'set +vx; STACKTRACE; set -vx' ERR
+	trap 'aux teardown' EXIT # don't forget to clean up
+else
+	trap 'cd $TESTOLDPWD; rm -rf "${TESTDIR:?}"' EXIT
+fi

 cd "$TESTDIR"
 mkdir lib
@ -114,7 +122,7 @@ mkdir "$LVM_SYSTEM_DIR" "$DM_DEV_DIR"
 if test -n "$LVM_TEST_DEVDIR" ; then
 	test -d "$LVM_TEST_DEVDIR" || die "Test device directory LVM_TEST_DEVDIR=\"$LVM_TEST_DEVDIR\" is not valid."
 	DM_DEV_DIR=$LVM_TEST_DEVDIR
-else
+elif test -z "$SKIP_ROOT_DM_CHECK" ; then
 	mknod "$DM_DEV_DIR/testnull" c 1 3 || die "mknod failed"
 	echo >"$DM_DEV_DIR/testnull" || \
 		die "Filesystem does support devices in $DM_DEV_DIR (mounted with nodev?)"
@ -151,7 +159,7 @@ if test -n "$LVM_TEST_LVMETAD" ; then
 	export LVM_LVMETAD_SOCKET="$TESTDIR/lvmetad.socket"
 	export LVM_LVMETAD_PIDFILE="$TESTDIR/lvmetad.pid"
 	aux prepare_lvmetad
-else
+elif test -z "$SKIP_ROOT_DM_CHECK" ; then
 	# lvmetad prepares its own lvmconf
 	export LVM_LVMETAD_PIDFILE="$TESTDIR/non-existing-file"
 	aux lvmconf
--- a/test/shell/fsadm-crypt.sh
+++ b/test/shell/fsadm-crypt.sh
@ -14,6 +14,10 @@ test_description='Exercise fsadm filesystem resize on crypt devices'
 SKIP_WITH_LVMLOCKD=1
 SKIP_WITH_LVMPOLLD=1

+# FIXME: cannot use brd (ramdisk)  - lsblk is NOT listing it
+# so lsblk usage should be replaced
+export LVM_TEST_PREFER_BRD=0
+
 . lib/inittest

 aux prepare_vg 1 300
--- a/test/shell/fsadm-renamed.sh
+++ b/test/shell/fsadm-renamed.sh
@ -70,7 +70,7 @@ lvcreate -n $lv1 -L20M $vg

 case "$i" in
 *ext3)		MKFS_ARGS="-b1024 -j" ;;
-*xfs)		MKFS_ARGS="-l internal,size=1000b -f" ;;
+*xfs)		MKFS_ARGS="-l internal,size=1700b -f" ;;
 *reiserfs)	MKFS_ARGS="-s 513 -f" ;;
 esac

--- a/test/shell/lvconvert-mirror.sh
+++ b/test/shell/lvconvert-mirror.sh
@ -17,7 +17,7 @@ export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-}

 . lib/inittest

-aux prepare_pvs 5 100
+aux prepare_pvs 5
 get_devs

 # proper DEVRANGE needs to be set according to extent size
@ -320,9 +320,13 @@ fi
 aux zero_dev "$dev2" $(get first_extent_sector "$dev2"):
 aux zero_dev "$dev4" $(get first_extent_sector "$dev4"):

+SHOULD=
+aux throttle_dm_mirror || SHOULD=should
+
 # Use large enough mirror that takes time to sychronize with small regionsize
-lvcreate -aey -L80 -Zn -Wn --type mirror --regionsize 16k -m2 -n $lv1 $vg "$dev1" "$dev2" "$dev4" "$dev3:$DEVRANGE"
-not lvconvert -m-1 $vg/$lv1 "$dev1" 2>&1 | tee out
+lvcreate -aey -L20 -Zn -Wn --type mirror --regionsize 16k -m2 -n $lv1 $vg "$dev1" "$dev2" "$dev4" "$dev3:$DEVRANGE"
+$SHOULD not lvconvert -m-1 $vg/$lv1 "$dev1" 2>&1 | tee out
+aux restore_dm_mirror
 grep "not in-sync" out

 lvconvert $vg/$lv1 # wait
@ -334,9 +338,11 @@ check linear $vg $lv1
 check lv_on $vg $lv1 "$dev4"
 lvremove -ff $vg

+
+aux throttle_dm_mirror || :
 # No parallel lvconverts on a single LV please
 # Use big enough mirror size and small regionsize to run on all test machines succesfully
-lvcreate -aey -Zn -Wn -L80 --type mirror --regionsize 16k -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:0-8"
+lvcreate -aey -Zn -Wn -L20 --type mirror --regionsize 16k -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:0-8"
 check mirror $vg $lv1
 check mirror_legs $vg $lv1 2

@ -344,7 +350,8 @@ LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4"
 # ATM upconversion should be running

 # Next convert should fail b/c we can't have 2 at once
-not lvconvert -m+1 $vg/$lv1 "$dev5"  2>&1 | tee out
+$SHOULD not lvconvert -m+1 $vg/$lv1 "$dev5"  2>&1 | tee out
+aux restore_dm_mirror
 grep "is already being converted" out

 lvconvert $vg/$lv1 # wait
@ -353,10 +360,6 @@ check mirror_no_temporaries $vg $lv1
 check mirror_legs $vg $lv1 3
 lvremove -ff $vg

-lvs -a $vg
-dmsetup table
-losetup -a
-ls -lRa $PWD

 # "rhbz440405: lvconvert -m0 incorrectly fails if all PEs allocated"
 lvcreate -aey -l "$(get pv_field "$dev1" pe_count)" --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE"
@ -366,5 +369,4 @@ lvconvert -m0 $vg/$lv1 "$dev1"
 check linear $vg $lv1
 lvremove -ff $vg

-
 vgremove -ff $vg
--- a/test/shell/lvconvert-raid-reshape.sh
+++ b/test/shell/lvconvert-raid-reshape.sh
@ -18,7 +18,7 @@ LVM_SKIP_LARGE_TESTS=0
 . lib/inittest

 which mkfs.ext4 || skip
-aux have_raid 1 13 1 || skip # needed to address RHBZ#1501145
+aux have_raid 1 13 99 || skip # needed to address RHBZ#1501145

 # Temporarily skip reshape tests on single-core CPUs until there's a fix for
 # https://bugzilla.redhat.com/1443999 - AGK 2017/04/20
--- a/test/shell/lvconvert-snapshot.sh
+++ b/test/shell/lvconvert-snapshot.sh
@ -20,7 +20,7 @@ SKIP_WITH_LVMPOLLD=1
 aux prepare_pvs 2
 get_devs

-vgcreate -s 1k "$vg" "${DEVICES[@]}"
+vgcreate -s 4k "$vg" "${DEVICES[@]}"

 lvcreate --type snapshot -V50 -L1 -n $lv1 -s $vg

--- a/test/shell/lvcreate-small-snap.sh
+++ b/test/shell/lvcreate-small-snap.sh
@ -18,20 +18,20 @@ SKIP_WITH_LVMPOLLD=1
 aux prepare_pvs
 get_devs

-vgcreate -s 1k "$vg" "${DEVICES[@]}"
+vgcreate -s 4k "$vg" "${DEVICES[@]}"

 # 3 Chunks
 lvcreate -aey -n one -l 10 $vg
-lvcreate -s -l 12 -n snapA $vg/one
-lvcreate -s -c 4k -l 12 -n snapX1 $vg/one
-lvcreate -s -c 8k -l 24 -n snapX2 $vg/one
+lvcreate -s -l 3 -n snapA $vg/one
+lvcreate -s -c 4k -l 3 -n snapX1 $vg/one
+lvcreate -s -c 8k -l 6 -n snapX2 $vg/one

 # Check that snapshots that are too small are caught with correct error.
-not lvcreate -s -c 8k -l 8 -n snapX3 $vg/one 2>&1 | tee lvcreate.out
+not lvcreate -s -c 8k -l 2 -n snapX3 $vg/one 2>&1 | tee lvcreate.out
 not grep "suspend origin one" lvcreate.out
 grep "smaller" lvcreate.out

-not lvcreate -s -l 4 -n snapB $vg/one 2>&1 | tee lvcreate.out
+not lvcreate -s -l 1 -n snapB $vg/one 2>&1 | tee lvcreate.out
 not grep "suspend origin one" lvcreate.out
 grep "smaller" lvcreate.out

--- a/test/shell/lvmetad-disabled.sh
+++ b/test/shell/lvmetad-disabled.sh
@ -19,7 +19,11 @@ SKIP_WITH_LVMPOLLD=1
 aux prepare_devs 2

 kill "$(< LOCAL_LVMETAD)"
-while test -e "$TESTDIR/lvmetad.socket"; do echo -n .; sleep .1; done # wait for the socket close
+for i in {200..0} ; do
+	test -e "$TESTDIR/lvmetad.socket" || break
+	test "$i" -eq 0 && die "Too slow closing of lvmetad.socket. Aborting test."
+	echo -n .; sleep .1;
+done # wait for the socket close
 test ! -e "$LVM_LVMETAD_PIDFILE"

 aux lvmconf "global/use_lvmetad = 0"
--- a/test/shell/mirror-names.sh
+++ b/test/shell/mirror-names.sh
@ -51,18 +51,6 @@ lv_convert_lv_() {
 	get lv_field "$1" convert_lv | tr -d []
 }

-enable_devs() {
-	for i in "$dev1" "$dev2" "$dev3" "$dev4" "$dev5" ; do
-		aux enable_dev "$i"
-	done
-}
-
-delay_devs() {
-	for i in "$dev1" "$dev2" "$dev3" "$dev4" "$dev5" ; do
-		aux delay_dev "$i" 0 1000 "$(get first_extent_sector "$i"):"
-	done
-}
-
 # ---------------------------------------------------------------------
 # Common environment setup/cleanup for each sub testcases

@ -122,14 +110,14 @@ check_and_cleanup_lvs_

 #COMM "converting mirror names is ${lv1}_mimagetmp_2"
 lvcreate -aey -l2 --type mirror -m1 -n $lv1 $vg
-delay_devs
+# Use large enough polling interval so mirror is keeping mimagetmp
 LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -i+40 -b $vg/$lv1
 convlv=$(lv_convert_lv_ $vg/$lv1)
 test "$convlv" = "${lv1}_mimagetmp_2"
 lv_devices_ $vg/$lv1 $convlv ${lv1}_mimage_2
 lv_devices_ $vg/$convlv ${lv1}_mimage_0 ${lv1}_mimage_1
 lv_mirror_log_ $vg/$convlv ${lv1}_mlog
-enable_devs
+check lv_exists $vg ${lv1}_mimagetmp_2

 #COMM "mirror log name after re-adding is ${lv1}_mlog"
 lvconvert -f --mirrorlog core $vg/$lv1
--- a/test/shell/pvmove-abort-all.sh
+++ b/test/shell/pvmove-abort-all.sh
@ -20,19 +20,21 @@ export DM_ABORT_ON_INTERNAL_ERRORS=0

 . lib/inittest

+aux lvmconf 'activation/raid_region_size = 16'
+
+aux target_at_least dm-mirror 1 10 0 || skip
+# Throttle mirroring
+aux throttle_dm_mirror || skip
+
 aux prepare_pvs 6 60

-vgcreate -s 128k $vg "$dev1" "$dev2"
+vgcreate -s 512k $vg "$dev1" "$dev2"
 pvcreate --metadatacopies 0 "$dev3"
 vgextend $vg "$dev3"
-vgcreate -s 128k $vg1 "$dev4" "$dev5"
+vgcreate -s 512k $vg1 "$dev4" "$dev5"
 pvcreate --metadatacopies 0 "$dev6"
 vgextend $vg1 "$dev6"

-# Slowdown writes
-aux delay_dev "$dev3" 0 800 "$(get first_extent_sector "$dev3"):"
-aux delay_dev "$dev6" 0 800 "$(get first_extent_sector "$dev6"):"
-
 for mode in "--atomic" "" ;
 do
 for backgroundarg in "-b" "" ;
@ -48,7 +50,6 @@ cmd1=(pvmove -i1 $backgroundarg $mode "$dev1" "$dev3")
 cmd2=(pvmove -i1 $backgroundarg $mode "$dev2" "$dev3")
 cmd3=(pvmove -i1 $backgroundarg $mode -n $vg1/$lv1 "$dev4" "$dev6")

-if test -e HAVE_DM_DELAY; then

 if test -z "$backgroundarg" ; then
 	"${cmd1[@]}" &
@ -64,8 +65,6 @@ else
 	LVM_TEST_TAG="kill_me_$PREFIX" "${cmd3[@]}"
 fi

-fi
-
 # test removal of all pvmove LVs
 pvmove --abort

@ -82,7 +81,7 @@ aux kill_tagged_processes
 done
 done

-# Restore delayed device back
-aux enable_dev "$dev3" "$dev6"
+# Restore throttling
+aux restore_dm_mirror

 vgremove -ff $vg $vg1
--- a/test/shell/pvmove-abort.sh
+++ b/test/shell/pvmove-abort.sh
@ -16,15 +16,18 @@ SKIP_WITH_LVMLOCKD=1

 . lib/inittest

+aux lvmconf 'activation/raid_region_size = 16'
+
+aux target_at_least dm-mirror 1 10 0 || skip
+# Throttle mirroring
+aux throttle_dm_mirror || skip
+
 aux prepare_pvs 3 60

-vgcreate -s 128k $vg "$dev1" "$dev2"
+vgcreate -s 512k $vg "$dev1" "$dev2"
 pvcreate --metadatacopies 0 "$dev3"
 vgextend $vg "$dev3"

-# Slowdown read/writes
-aux delay_dev "$dev3" 0 800 "$(get first_extent_sector "$dev3"):"
-
 for mode in "--atomic" "" ;
 do
 for backgroundarg in "-b" "" ;
@ -32,13 +35,11 @@ do

 # Create multisegment LV
 lvcreate -an -Zn -l30 -n $lv1 $vg "$dev1"
-lvcreate -an -Zn -l30 -n $lv2 $vg "$dev2"
+lvcreate -an -Zn -l40 -n $lv2 $vg "$dev2"

 cmd1=(pvmove -i1 $backgroundarg $mode "$dev1" "$dev3")
 cmd2=(pvmove -i1 $backgroundarg $mode "$dev2" "$dev3")

-if test -e HAVE_DM_DELAY; then
-
 if test -z "$backgroundarg" ; then
 	"${cmd1[@]}" &
 	aux wait_pvmove_lv_ready "$vg-pvmove0"
@ -57,8 +58,6 @@ get lv_field $vg name -a | tee out
 not grep -E "^\[?pvmove0" out
 grep -E "^\[?pvmove1" out

-fi
-
 # remove any remaining pvmoves in progress
 pvmove --abort

@ -69,7 +68,7 @@ aux kill_tagged_processes
 done
 done

-# Restore delayed device back
-aux enable_dev "$dev3"
+# Restore throttling
+aux restore_dm_mirror

 vgremove -ff $vg
--- a/test/shell/snapshot-maxsize.sh
+++ b/test/shell/snapshot-maxsize.sh
@ -21,7 +21,7 @@ SKIP_WITH_LVMPOLLD=1
 aux prepare_pvs 1
 get_devs

-vgcreate -s 1K "$vg" "${DEVICES[@]}"
+vgcreate -s 4K "$vg" "${DEVICES[@]}"

 lvcreate -aey -L1 -n $lv1 $vg
 # Snapshot should be large enough to handle any writes
--- a/test/shell/snapshot-usage.sh
+++ b/test/shell/snapshot-usage.sh
@ -27,7 +27,7 @@ fill() {

 cleanup_tail()
 {
-	test -z "$SLEEP_PID" || kill $SLEEP_PID || true
+	test -z "${SLEEP_PID-}" || kill $SLEEP_PID || true
 	wait
 	vgremove -ff $vg1 || true
 	vgremove -ff $vg
@ -83,8 +83,7 @@ aux lvmconf "activation/snapshot_autoextend_percent = 20" \
 # Check usability with smallest (1k) extent size ($lv has 15P)
 pvcreate --yes --setphysicalvolumesize 4T "$DM_DEV_DIR/$vg/$lv"
 trap 'cleanup_tail' EXIT
-vgcreate -s 1K $vg1 "$DM_DEV_DIR/$vg/$lv"
-
+vgcreate -s 4K $vg1 "$DM_DEV_DIR/$vg/$lv"

 # Play with small 1k 128 extents
 lvcreate -aey -L128K -n $lv $vg1
@ -135,29 +134,30 @@ check lv_not_exists $vg1 $lv1
 # Check border size
 lvcreate -aey -L4095G $vg1
 lvcreate -s -L100K $vg1/lvol0
-fill 1K
+fill 4K
 check lv_field $vg1/lvol1 data_percent "12.00"

 lvremove -ff $vg1

-# Create 1KB snapshot, does not need to be active here
+# Create 4KB snapshot, does not need to be active here
 lvcreate -an -Zn -l1 -n $lv1 $vg1
 not lvcreate -s -l1 $vg1/$lv1
-not lvcreate -s -l3 $vg1/$lv1
+# snapshot cannot be smaller then 3 chunks (12K)
+not lvcreate -s -l2 $vg1/$lv1
 lvcreate -s -l30 -n $lv2 $vg1/$lv1
 check lv_field $vg1/$lv2 size "$EXPECT1"

-not lvcreate -s -c512 -l512 $vg1/$lv1
+not lvcreate -s -c512 -l128 $vg1/$lv1
 lvcreate -s -c128 -l1700 -n $lv3 $vg1/$lv1
 # 3 * 128
 check lv_field $vg1/$lv3 size "$EXPECT2"
 lvremove -ff $vg1

-lvcreate -aey -l20 $vg1
-lvcreate -s -l12 $vg1/lvol0
+lvcreate -aey -l5 $vg1
+lvcreate -s -l3 $vg1/lvol0

-# Fill 1KB -> 100% snapshot (1x 4KB chunk)
-fill 1K
+# Fill 4KB -> 100% snapshot (1x 4KB chunk)
+fill 4K
 check lv_field $vg1/lvol1 data_percent "100.00"

 # Check it resizes 100% full valid snapshot to fit threshold
@ -168,7 +168,7 @@ fill 4K
 lvextend --use-policies $vg1/lvol1
 check lv_field $vg1/lvol1 size "24.00k"

-lvextend -l+33 $vg1/lvol1
+lvextend -l+8 $vg1/lvol1
 check lv_field $vg1/lvol1 size "$EXPECT3"

 fill 20K
@ -189,7 +189,7 @@ lvremove -f $vg1/snap
 # Undeleted header would trigger attempt to access
 # beyond end of COW device
 # Fails to create when chunk size is different
-lvcreate -s -pr -l12 -n snap $vg1/$lv
+lvcreate -s -pr -l3 -n snap $vg1/$lv

 # When header is undelete, fails to read snapshot without read errors
 #dd if="$DM_DEV_DIR/$vg1/snap" of=/dev/null bs=1M count=2
--- a/test/shell/thin-merge.sh
+++ b/test/shell/thin-merge.sh
@ -22,6 +22,7 @@ MKFS=mkfs.ext2
 which $MKFS  || skip
 which fsck || skip

+MKFS="$MKFS -b4096"
 #
 # Main
 #
--- a/test/shell/topology-support.sh
+++ b/test/shell/topology-support.sh
@ -23,7 +23,7 @@ lvdev_() {

 test_snapshot_mount() {
    lvcreate -aey -L4M -n $lv1 $vg "$dev1"
-    mkfs.ext3 "$(lvdev_ $vg $lv1)"
+    mkfs.ext3 -b4096 "$(lvdev_ $vg $lv1)"
    mkdir test_mnt
    mount "$(lvdev_ $vg $lv1)" test_mnt
    lvcreate -L4M -n $lv2 -s $vg/$lv1
--- a/test/shell/vgcreate-usage.sh
+++ b/test/shell/vgcreate-usage.sh
@ -73,10 +73,10 @@ not vgcreate $vg "$dev3"

 # Test default (4MB) vg_extent_size as well as limits of extent_size
 not vgcreate --physicalextentsize 0k $vg "$dev1" "$dev2"
-vgcreate --physicalextentsize 1k $vg "$dev1" "$dev2"
-check vg_field $vg vg_extent_size 1.00k
+vgcreate --physicalextentsize 4k $vg "$dev1" "$dev2"
+check vg_field $vg vg_extent_size 4.00k
 vgremove -ff $vg
-not vgcreate --physicalextentsize 3K $vg "$dev1" "$dev2"
+not vgcreate --physicalextentsize 7K $vg "$dev1" "$dev2"
 not vgcreate --physicalextentsize 1024t $vg "$dev1" "$dev2"
 #not vgcreate --physicalextentsize 1T $vg "$dev1" "$dev2"
 # FIXME: vgcreate allows physicalextentsize larger than pv size!
--- a/test/unit/Makefile.in
+++ b/test/unit/Makefile.in
@ -34,10 +34,10 @@ UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o)
 CLEAN_TARGETS+=$(UNIT_DEPENDS) $(UNIT_OBJECTS)
 UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -laio

-test/unit/unit-test: $(UNIT_OBJECTS) device_mapper/libdevice-mapper.a lib/liblvm-internal.a
+test/unit/unit-test: $(UNIT_OBJECTS) lib/liblvm-internal.a device_mapper/libdevice-mapper.a
 	@echo "    [LD] $@"
-	$(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \
-	      -o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS)
+	$(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) \
+	      -o $@ $+ $(UNIT_LDLIBS) -lm

 .PHONEY: run-unit-test
 run-unit-test: test/unit/unit-test
--- a/test/unit/bcache_t.c
+++ b/test/unit/bcache_t.c
@ -504,7 +504,7 @@ static void test_prefetch_issues_a_read(void *context)
 		_expect_read(me, fd, i);
 		bcache_prefetch(cache, fd, i);
 	}
-
+	_no_outstanding_expectations(me);

 	for (i = 0; i < nr_cache_blocks; i++) {
 		_expect(me, E_WAIT);
@ -810,6 +810,45 @@ static void test_invalidate_held_block(void *context)
 	bcache_put(b);
 }

+//----------------------------------------------------------------
+// Chasing a bug reported by dct
+
+static void _cycle(struct fixture *f, unsigned nr_cache_blocks)
+{
+	struct mock_engine *me = f->me;
+	struct bcache *cache = f->cache;
+
+	unsigned i;
+	struct block *b;
+
+	for (i = 0; i < nr_cache_blocks; i++) {
+		// prefetch should not wait
+		_expect_read(me, i, 0);
+		bcache_prefetch(cache, i, 0);
+	}
+
+	// This double checks the reads occur in response to the prefetch
+	_no_outstanding_expectations(me);
+
+	for (i = 0; i < nr_cache_blocks; i++) {
+		_expect(me, E_WAIT);
+		T_ASSERT(bcache_get(cache, i, 0, 0, &b));
+		bcache_put(b);
+	}
+
+	_no_outstanding_expectations(me);
+}
+
+static void test_concurrent_reads_after_invalidate(void *context)
+{
+	struct fixture *f = context;
+	unsigned i, nr_cache_blocks = 16;
+
+	_cycle(f, nr_cache_blocks);
+	for (i = 0; i < nr_cache_blocks; i++)
+        	bcache_invalidate_fd(f->cache, i);
+        _cycle(f, nr_cache_blocks);
+}

 /*----------------------------------------------------------------
 * Top level
@ -859,6 +898,8 @@ static struct test_suite *_small_tests(void)
 	T("invalidate-read-error", "invalidate a block that errored", test_invalidate_after_read_error);
 	T("invalidate-write-error", "invalidate a block that errored", test_invalidate_after_write_error);
 	T("invalidate-fails-in-held", "invalidating a held block fails", test_invalidate_held_block);
+	T("concurrent-reads-after-invalidate", "prefetch should still issue concurrent reads after invalidate",
+          test_concurrent_reads_after_invalidate);

 	return ts;
 }
--- a/test/unit/bcache_utils_t.c
+++ b/test/unit/bcache_utils_t.c
@ -14,6 +14,10 @@

 #define _GNU_SOURCE

+#include "lib/device/bcache.h"
+#include "framework.h"
+#include "units.h"
+
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@ -21,10 +25,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
-
-#include "lib/device/bcache.h"
-#include "framework.h"
-#include "units.h"
+#include <sys/statvfs.h>

 //----------------------------------------------------------------

@ -53,11 +54,21 @@ static void *_fix_init(struct io_engine *engine)
        uint8_t buffer[T_BLOCK_SIZE];
        struct fixture *f = malloc(sizeof(*f));
        unsigned b, i;
+	struct statvfs fsdata;
+	static int _runs_is_tmpfs = -1;
+
+	if (_runs_is_tmpfs == -1) {
+		// With testing in tmpfs directory O_DIRECT cannot be used
+		// tmpfs has  f_fsid == 0  (unsure if this is best guess)
+		_runs_is_tmpfs = (statvfs(".", &fsdata) == 0 && !fsdata.f_fsid) ? 1 : 0;
+		if (_runs_is_tmpfs)
+			printf("  Running test in tmpfs, *NOT* using O_DIRECT\n");
+	}

        T_ASSERT(f);

        snprintf(f->fname, sizeof(f->fname), "unit-test-XXXXXX");
-	f->fd = mkostemp(f->fname, O_RDWR | O_CREAT | O_EXCL);
+	f->fd = mkstemp(f->fname);
 	T_ASSERT(f->fd >= 0);

 	for (b = 0; b < NR_BLOCKS; b++) {
@ -65,11 +76,13 @@ static void *_fix_init(struct io_engine *engine)
                	buffer[i] = _pattern_at(INIT_PATTERN, byte(b, i));
 		T_ASSERT(write(f->fd, buffer, T_BLOCK_SIZE) > 0);
 	}
-	close(f->fd);

-	// reopen with O_DIRECT
-	f->fd = open(f->fname, O_RDWR | O_DIRECT);
-	T_ASSERT(f->fd >= 0);
+	if (!_runs_is_tmpfs) {
+		close(f->fd);
+		// reopen with O_DIRECT
+		f->fd = open(f->fname, O_RDWR | O_DIRECT);
+		T_ASSERT(f->fd >= 0);
+	}

 	f->cache = bcache_create(T_BLOCK_SIZE / 512, NR_BLOCKS, engine);
 	T_ASSERT(f->cache);
--- a/test/unit/io_engine_t.c
+++ b/test/unit/io_engine_t.c
@ -89,13 +89,14 @@ static void *_fix_init(void)
        	test_fail("posix_memalign failed");

        snprintf(f->fname, sizeof(f->fname), "unit-test-XXXXXX");
-	f->fd = mkostemp(f->fname, O_RDWR | O_CREAT | O_EXCL);
+	f->fd = mkstemp(f->fname);
 	T_ASSERT(f->fd >= 0);

 	_fill_buffer(f->data, 123, SECTOR_SIZE * BLOCK_SIZE_SECTORS);

-	write(f->fd, f->data, SECTOR_SIZE * BLOCK_SIZE_SECTORS);
-	lseek(f->fd, 0, SEEK_SET);
+	T_ASSERT(write(f->fd, f->data, SECTOR_SIZE * BLOCK_SIZE_SECTORS) > 0);
+	T_ASSERT(lseek(f->fd, 0, SEEK_SET) != -1);
+
        return f;
 }

--- a/test/unit/unit-test.sh
+++ b/test/unit/unit-test.sh
@ -0,0 +1,23 @@
+#!/bin/sh
+# Copyright (C) 2018 Red Hat, Inc. All rights reserved.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+SKIP_WITH_LVMLOCKD=1
+SKIP_WITH_LVMPOLLD=1
+SKIP_WITH_LVMETAD=1
+SKIP_WITH_CLVMD=1
+
+SKIP_ROOT_DM_CHECK=1
+
+. lib/inittest
+
+aux unittest run
--- a/tools/polldaemon.c
+++ b/tools/polldaemon.c
@ -80,6 +80,8 @@ static int _check_lv_status(struct cmd_context *cmd,
 	}

 	progress = parms->poll_fns->poll_progress(cmd, lv, name, parms);
+	fflush(stdout);
+
 	if (progress == PROGRESS_CHECK_FAILED)
 		return_0;

@ -134,7 +136,6 @@ static void _sleep_and_rescan_devices(struct cmd_context *cmd, struct daemon_par
 		 */
 		lvmcache_destroy(cmd, 1, 0);
 		label_scan_destroy(cmd);
-		dev_close_all();
 		_nanosleep(parms->interval, 1);
 		lvmcache_label_scan(cmd);
 	}
@ -449,6 +450,7 @@ static int _report_progress(struct cmd_context *cmd, struct poll_operation_id *i
 		ret = 0;
 		goto out;
 	}
+	fflush(stdout);

 	ret = 1;

@ -530,9 +532,6 @@ static void _lvmpolld_poll_for_all_vgs(struct cmd_context *cmd,
 				_report_progress(cmd, idl->id, lpdp.parms);
 		}

-		if (lpdp.parms->interval)
-			dev_close_all();
-
 		_nanosleep(lpdp.parms->interval, 0);
 	}

@ -559,9 +558,6 @@ static int _lvmpoll_daemon(struct cmd_context *cmd, struct poll_operation_id *id
 				    (!parms->aborting && !(r = _report_progress(cmd, id, parms))))
 					break;

-				if (parms->interval)
-					dev_close_all();
-
 				_nanosleep(parms->interval, 0);
 			}
 		}
@ -620,7 +616,6 @@ static int _poll_daemon(struct cmd_context *cmd, struct poll_operation_id *id,
 	/* clear lvmcache/bcache/fds from the parent */
 	lvmcache_destroy(cmd, 1, 0);
 	label_scan_destroy(cmd);
-	dev_close_all();

 	if (id) {
 		if (!wait_for_single_lv(cmd, id, parms)) {
--- a/tools/pvck.c
+++ b/tools/pvck.c
@ -25,17 +25,8 @@ int pvck(struct cmd_context *cmd, int argc, char **argv)
 	int i;
 	int ret_max = ECMD_PROCESSED;

-	/* FIXME: validate cmdline options */
-	/* FIXME: what does the cmdline look like? */
-
 	labelsector = arg_uint64_value(cmd, labelsector_ARG, UINT64_C(0));

-	if (labelsector) {
-		/* FIXME: see label_read_sector */
-		log_error("TODO: reading label from non-zero sector");
-		return ECMD_FAILED;
-	}
-
 	dm_list_init(&devs);

 	for (i = 0; i < argc; i++) {
@ -61,6 +52,25 @@ int pvck(struct cmd_context *cmd, int argc, char **argv)
 	label_scan_devs(cmd, cmd->filter, &devs);

 	dm_list_iterate_items(devl, &devs) {
+
+		/*
+		 * The scan above will populate lvmcache with any info from the
+		 * standard locations at the start of the device.  Now populate
+		 * lvmcache with any info from non-standard offsets.
+		 *
+		 * FIXME: is it possible for a real lvm label sector to be
+		 * anywhere other than the first four sectors of the disk?
+		 * If not, drop the code in label_read_sector/find_lvm_header
+		 * that supports searching at any sector.
+		 */
+		if (labelsector) {
+			if (!label_read_sector(devl->dev, labelsector)) {
+				stack;
+				ret_max = ECMD_FAILED;
+				continue;
+			}
+		}
+
 		if (!pv_analyze(cmd, devl->dev, labelsector)) {
 			stack;
 			ret_max = ECMD_FAILED;
--- a/tools/toollib.c
+++ b/tools/toollib.c
@ -115,7 +115,6 @@ int become_daemon(struct cmd_context *cmd, int skip_lvm)
 			/* FIXME Clean up properly here */
 			_exit(ECMD_FAILED);
 	}
-	dev_close_all();

 	return 1;
 }
@ -1559,7 +1558,7 @@ int process_each_label(struct cmd_context *cmd, int argc, char **argv,
 			/*
 			 * add info to lvmcache from the duplicate dev.
 			 */
-			label_read(devl->dev, NULL, 0);
+			label_read(devl->dev);

 			/*
 			 * the info/label should now be found because