dmstats: separate report and sample clocks

Maintain separate timestamps for sampling interval and report waits and correct the sleep interval for the time spent collecting and processing stats.
dmstats: add libdm-stats library and 'dmsetup stats' command
2025-10-25 03:33:16 +03:00 · 2015-08-08 23:59:06 +01:00 · 2015-08-08 23:39:22 +01:00 · 2015-08-08 17:48:30 +01:00 · 2015-08-08 17:48:30 +01:00 · 2015-08-08 11:48:12 +01:00
595 changed files with 64538 additions and 14309 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,16 @@
 *.5
+*.7
 *.8
 *.a
 *.d
 *.o
+*.orig
 *.pc
 *.pot
+*.rej
 *.so
 *.so.*
-*.swp
+*.sw*
 *~

 .export.sym
@@ -17,11 +20,11 @@
 Makefile
 make.tmpl

-configure.h
-version.h
-
 /autom4te.cache/
+/autoscan.log
 /config.log
 /config.status
+/configure.scan
 /cscope.out
+/tags
 /tmp/
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,6 +1,6 @@
 #
 # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@@ -15,6 +15,8 @@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_builddir = @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@

 SUBDIRS = conf daemons include lib libdaemon libdm man scripts tools

@@ -91,10 +93,44 @@ cscope.out:
 all: cscope.out
 endif
 DISTCLEAN_TARGETS += cscope.out
+CLEAN_DIRS += autom4te.cache

-check check_system check_cluster check_local check_lvmetad unit: all
+check check_system check_cluster check_local check_lvmetad check_lvmpolld unit: all
 	$(MAKE) -C test $(@)

+conf.generate: tools
+
+# how to use parenthesis in makefiles
+leftparen:=(
+LVM_VER := $(firstword $(subst $(leftparen), ,$(LVM_VERSION)))
+VER := LVM2.$(LVM_VER)
+# release file name
+FILE_VER := $(VER).tgz
+CLEAN_TARGETS += $(FILE_VER)
+CLEAN_DIRS += $(rpmbuilddir)
+
+dist:
+	@echo "Generating $(FILE_VER)";\
+	(cd $(top_srcdir); git ls-tree -r HEAD --name-only | xargs tar --transform "s,^,$(VER)/," -c) | gzip >$(FILE_VER)
+
+rpm: dist
+	$(RM) -r $(rpmbuilddir)/SOURCES
+	$(MKDIR_P) $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_builddir)/$(FILE_VER) $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/build.inc $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/macros.inc $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/packages.inc $(rpmbuilddir)/SOURCES
+	DM_VER=$$(cut -d- -f1 $(top_srcdir)/VERSION_DM);\
+	GIT_VER=$$(cd $(top_srcdir); git describe | cut -d- --output-delimiter=. -f2,3 || echo 0);\
+	sed -e "s,\(device_mapper_version\) [0-9.]*$$,\1 $$DM_VER," \
+	    -e "s,^\(Version:[^0-9%]*\)[0-9.]*$$,\1 $(LVM_VER)," \
+	    -e "s,^\(Release:[^0-9%]*\)[0-9.]\+,\1 $$GIT_VER," \
+	    $(top_srcdir)/spec/source.inc >$(rpmbuilddir)/SOURCES/source.inc
+	rpmbuild -v --define "_topdir $(rpmbuilddir)" -ba $(top_srcdir)/spec/lvm2.spec
+
+generate: conf.generate
+	$(MAKE) -C conf generate
+
 install_system_dirs:
 	$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_SYS_DIR)
 	$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_ARCHIVE_DIR)
@@ -122,8 +158,11 @@ endif
 install_tmpfiles_configuration:
 	$(MAKE) -C scripts install_tmpfiles_configuration

-LCOV_TRACES = libdm.info lib.info tools.info \
-	daemons/dmeventd.info daemons/clvmd.info
+LCOV_TRACES = libdm.info lib.info liblvm.info tools.info \
+	libdaemon/client.info libdaemon/server.info \
+	daemons/clvmd.info daemons/dmeventd.info \
+	daemons/lvmetad.info
+
 CLEAN_TARGETS += $(LCOV_TRACES)

 ifneq ("$(LCOV)", "")
@@ -152,7 +191,7 @@ lcov: $(LCOV_TRACES)
 	$(RM) -r $(LCOV_REPORTS_DIR)
 	$(MKDIR_P) $(LCOV_REPORTS_DIR)
 	for i in $(LCOV_TRACES); do \
-		test -s $$i && lc="$$lc $$i"; \
+		test -s $$i -a $$(wc -w <$$i) -ge 100 && lc="$$lc $$i"; \
 	done; \
 	test -z "$$lc" || $(GENHTML) -p @abs_top_builddir@ \
 		-o $(LCOV_REPORTS_DIR) $$lc
@@ -184,3 +223,13 @@ memcheck: test-programs
 ruby-test:
 	$(RUBY) report-generators/test/ts.rb
 endif
+
+ifneq ($(shell which ctags),)
+.PHONY: tags
+all: tags
+tags:
+	test -z "$(shell find $(top_srcdir) -type f -name '*.[ch]' -newer tags | head -1)" || $(RM) tags
+	test -f tags || find $(top_srcdir) -maxdepth 4 -type f -name '*.[ch]' -exec ctags -a '{}' +
+
+DISTCLEAN_TARGETS += tags
+endif
--- a/2
+++ b/2
@@ -18,7 +18,7 @@ Mailing list for general discussion related to LVM2:

 Mailing lists for LVM2 development, patches and commits:
  lvm-devel@redhat.com
-  Subscribe from https://www.redhat.com/mailman/listinfo/linux-lvm
+  Subscribe from https://www.redhat.com/mailman/listinfo/lvm-devel

  lvm2-commits@lists.fedorahosted.org (Read-only archive of commits)
  Subscribe from https://fedorahosted.org/mailman/listinfo/lvm2-commits
--- a/2
+++ b/2
@@ -1 +1 @@
-2.02.111(2)-git (2014-09-01)
+2.02.127(2)-git (2015-07-24)
--- a/2
+++ b/2
@@ -1 +1 @@
-1.02.90-git (2014-09-01)
+1.02.104-git (2015-07-24)
--- a/363
+++ b/363
@@ -1,3 +1,366 @@
+Version 2.02.127 - 
+=================================
+  Do not init filters, locking, lvmetad, lvmpolld if command doesn't use it.
+  Recognise vg/lv name format in dmsetup.
+  Fix regression in cache causing some PVs to bypass filters (2.02.105).
+
+Version 2.02.126 - 24th July 2015
+=================================
+  Fix long option hyphen removal. (2.02.122)
+  Fix clvmd freeze if client disappears without first releasing its locks.
+  Fix lvconvert segfaults while performing snapshots merge.
+  Ignore errors during detection if use_blkid_wiping=1 and --force is used.
+  Recognise DM_ABORT_ON_INTERNAL_ERRORS env var override in lvm logging fn.
+  Fix alloc segfault when extending LV with fewer stripes than in first seg.
+  Fix handling of cache policy name.
+  Set cache policy before with the first lvm2 cache pool metadata commit.
+  Fix detection of thin-pool overprovisioning (2.02.124).
+  Fix lvmpolld segfaults on 32 bit architectures.
+  Add lvmlockd lock_args validation to vg_validate.
+  Fix ignored --startstopservices option if running lvmconf with systemd.
+  Hide sanlock LVs when processing LVs in VG unless named or --all used.
+
+Version 2.02.125 - 7th July 2015
+================================
+  Fix getline memory usage in lvmpolld.
+  Add support --clear-needs-check-flag for cache_check of cache pool metadata.
+  Add lvmetactl for developer use only.
+  Rename global/lock_retries to lvmlockd_retries.
+  Replace --enable-lvmlockd by --enable-lockd-sanlock and --enable-lockd-dlm.
+
+Version 2.02.124 - 3rd July 2015
+================================
+  Move sending thin pool messages from resume to suspend phase.
+  Report warning when pool is overprovisioned and not auto resized.
+  Recognize free-form date/time values for lv_time field in selection criteria.
+  Added experimental lvmlockd with configure --enable-lvmlockd.
+  Fix regression in select to match string fields if using synonyms (2.02.123).
+  Fix regression when printing more lv names via display_lvname (2.02.122).
+  Add missing error logging to unlock_vg and sync_local_dev_names callers.
+
+Version 2.02.123 - 30th June 2015
+=================================
+  Add report/time_format lvm.conf option to define time format for report.
+  Fix makefile shell compare == when building lvmetad lvmpolld (2.02.120).
+  Add --type full to lvmconfig for full configuration tree view.
+  Add undocumented environment variables to lvm man page. (2.02.119)
+  Add device synchronization point before activating a new snapshot.
+  Add --withspaces to lvmconfig to add spaces in output for better readability.
+  Add custom main function to libdaemon.
+  Use lvmetad to track out-of-date metadata discovered.
+
+Version 2.02.122 - 20th June 2015
+=================================
+  Flush stdout before printing to stderr.
+  Use pre-allocated buffer for printed LV names in display_lvname.
+  Support thins with size of external origin unaligned with thin pool chunk.
+  Allow extension of reduced thin volumes with external origins.
+  Consider snapshot and origin LV as unusable if component devices suspended.
+  Fix lvmconfig segfault on settings with undefined default value (2.02.120).
+  Add explicit 's' (shared) LV activation mode.
+  Ignore hyphens in long options names (i.e. --long-option == --longoption).
+
+Version 2.02.121 - 12th June 2015
+=================================
+  Distinguish between on-disk and lvmetad versions of text metadata.
+  Remove DL_LIBS from Makefiles for daemons that don't need them.
+  Zero errno in before strtoul call in dmsetup if tested after the call.
+  Zero errno in before strtoul call in lvmpolld.
+  Fix a segfault in pvscan --cache --background command.
+  Fix test for AREA_PV when checking for failed mirrors.
+  Do not use --sysinit in lvm2-activation{-early,-net}.service if lvmpolld used.
+  Maintain outdated PV info in lvmetad till all old metadata is gone from disk.
+  Do not fail polling when poll LV not found (already finished or removed).
+  Replace poll_get_copy_vg/lv fns with vg_read() and find_lv() in polldaemon.
+  Close all device fds only in before sleep call in polldaemon.
+  Simplify Makefile targets that generate exported symbols.
+  Move various -D settings from Makefiles to configure.h.
+
+Version 2.02.120 - 15th May 2015
+================================
+  Make various adjustments to Makefile compilation flags.
+  Add lvmpolld debug message class.
+  Add lvmpolld client mode for querying running server instance for status info.
+  Fix some libdaemon socket creation and reuse error paths.
+  Daemons (libdaemon) support exit on idle also in non-systemd environment.
+  Provide make dist and make rpm targets
+  Configure lvm.conf for use_lvmetad and use_lvmpolld.
+  Add lvpoll for cmdline communication with lvmpolld.
+  Add lvmpolld acting as a free-standing version of polldaemon.
+  Avoid repeated identical lvmetad VG lookups in commands processing all VGs.
+  Handle switches to alternative duplicate PVs efficiently with lvmetad.
+  Properly validate PV size for pvcreate --restorefile.
+  Fix check if pvcreate wiped device (2.02.117).
+  Fix storing of vgid when caching metadata (2.02.118).
+  Fix recursive lvm-config man page. (2.02.119)
+  Refactor polldaemon interfaces to poll every operation by VG/LV couple
+  Skip wait after testing in _wait_for_single_lv when polling finished
+  Return 'None' in python for empty string properties instead of crashing.
+  Distinguish signed numerical property type in reports for lvm2app library.
+  Reread raid completion status immediately when progress appears to be zero.
+  lvm2app closes locking on lvm_quit().
+  Configure detects /run or /var/run.
+  Add missing newline in clvmd --help output.
+
+Version 2.02.119 - 2nd May 2015
+===============================
+  New LVM_LOG_FILE_EPOCH, LVM_EXPECTED_EXIT_STATUS env vars. Man page to follow.
+  Remove detailed content from lvm.conf man page: use lvmconfig instead.
+  Generate complete config files with lvmconfig or 'make generate'.
+  Also display info on deprecated config with lvmconfig --withcomments.
+  Display version since which config is deprecated in lvmconfig --withversions.
+  Add --showdeprecated to lvmconfig to also display deprecated settings.
+  Hide deprecated settings in lvmconfig output for all types but current,diff.
+  Introduce support for exit on idle feature in libdaemon
+  Add --showunsupported to lvmconfig to also display unsupported settings.
+  Display unsupported settings for lvmconfig --type current,diff only by default
+  Honour lvmconfig --ignoreunsupported and --ignoreadvanced for all --type.
+  Make python bindings usable with python3 (and compatible with 2.6 & 2.7).
+  Add lvmconfig -l|--list as shortcut for lvmconfig --type list --withsummary.
+  Add lvmconfig --type list to display plain list of configuration settings.
+  Introduce lvmconfig as the preferred form of 'lvm dumpconfig'.
+  Add lv_ancestors and lv_descendants reporting fields.
+  Add --ignorelocal option to dumpconfig to ignore the local section.
+  Close connection to lvmetad after fork.
+  Make lvchange able to resume background pvmove polling again.
+  Split pvmove update metadata fn in an initial one and a subsequent one.
+  Refactor shared pvmove and lvconvert code into new _poll files.
+  Add --unconfigured option to dumpconfig to print strings unconfigured.
+  Add --withsummary option to dumpconfig to print first line - summary comment.
+  Use number of device holders to help choose between duplicate PVs.
+  Try to make lvmetad and non-lvmetad duplicate PV handling as similar as poss.
+  Issue warnings about duplicate PVs discovered by lvmetad.
+  Track alternative devices with matching PVIDs in lvmetad.
+  Check for lvm binary in blkdeactivate and skip LVM processing if not present.
+  Add --enable-halvm and --disable-halvm options to lvmconf script.
+  Add --services, --mirrorservice and --startstopservices option to lvmconf.
+  Use proper default value of global/use_lvmetad when processing lvmconf script.
+  Respect allocation/cling_tag_list during intial contiguous allocation.
+  Add A_PARTITION_BY_TAGS set when allocated areas should not share tags.
+  Make changes persist with python addTag/removeTag.
+  Set correct vgid when updating cache when writing PV metadata.
+  More efficient clvmd singlenode locking emulation.
+  Reject lvcreate -m with raid4/5/6 to avoid unexpected layout.
+  Don't skip invalidation of cached orphans if vg write lck is held (2.02.118).
+  Log relevant PV tags when using cling allocation.
+  Add str_list_add_list() to combine two lists.
+  Fix LV processing with selection to always do the selection on initial state.
+  Add internal LV_REMOVED LV status flag.
+
+Version 2.02.118 - 23rd March 2015
+==================================
+  Store metadata size + checksum in lvmcache and add struct lvmcache_vgsummary.
+  Remove inaccessible clustered PVs from 'pvs -a'.
+  Don't invalidate cached orphan information while global lock is held.
+  Avoid rescan of all devices when requested pvscan for removed device.
+  Measure configuration timestamps with nanoseconds when available.
+  Disable lvchange of major and minor of pool LVs.
+  Fix pvscan --cache to not scan and read ignored metadata areas on PVs.
+  Add After=iscsi-shutdown.service to blk-availability.service systemd unit.
+  Disallow vgconvert from changing metadata format when lvmetad is used.
+  Don't do a full read of VG when creating a new VG with an existing name.
+  Reduce amount of VG metadata parsing when looking for vgname on a PV.
+  Avoid reparsing same metadata when reading same metadata from multiple PVs.
+  Save extra device open/close when scanning device for size.
+  Fix seg_monitor field to report status also for mirrors and thick snapshots.
+  Replace LVM_WRITE with LVM_WRITE_LOCKED flags in metadata if system ID is set.
+  Remove ACCESS_NEEDS_SYSTEM_ID VG status flag. (2.02.117)
+  Enable system ID features.
+
+Version 2.02.117 - 4th March 2015
+=================================
+  Add CFG_DISABLED for new system ID config settings that must not yet be used.
+  Preserve original format type field when processing backup files.
+  Implement status action for lvm2-monitor initscript to display monitored LVs.
+  Allow lvchange -p to change kernel state only if metadata state differs.
+  Fix incorrect persistent .cache after report with label fields only (2.02.106).
+  Reinstate PV tag recognition for pvs if reporting label fields only (2.02.105).
+  Rescan devices before vgimport with lvmetad so exported VG is seen.
+  Fix hang by adjusting cluster mirror regionsize, avoiding CPG msg limit.
+  Do not crash when --cachepolicy is given without --cachesettings.
+  Add NEEDS_FOREIGN_VGS flag to vgimport so --foreign is always supplied.
+  Add --foreign to the 6 display and reporting tools and vgcfgbackup.
+  Install /etc/lvm/lvmlocal.conf template with local section for systemid.
+  Record creation_host_system_id in lvm2 metadata (never set yet).
+  Reinstate recursive config file tag section processing. (2.02.99)
+  Add 'lvm systemid' to display the current system ID (never set yet).
+  Fix configure to properly recognize --with-default-raid10-segtype option.
+  Do not refresh filters/rescan if no signature is wiped during pvcreate.
+  Enforce none external dev info for wiping during pvcreate to avoid races.
+  Add global/system_id_source and system_id_file to lvm.conf (disabled).
+  Add support for VG system_id to control host access to VGs.
+  Update vgextend to use process_each_vg.
+  Add --ignoreskippedcluster to pvchange.
+  Allow pvchange to modify several properties at once.
+  Update pvchange to use process_each_pv.
+  Fix pvs -a used with lvmetad to filter out devices unsuitable for PVs.
+  Fix selection to recognize units for ba_start, vg_free and seg_start fields.
+  Add support for -S/--select to vgexport and vgimport.
+  Add support for -S/--select to vgdisplay, lvdisplay and pvdisplay without -C.
+  Add support for -S/--select to vgremove and lvremove.
+  Add support for -S/--select to vgchange,lvchange and pvchange.
+  Add infrastructure to support selection for non-reporting tools.
+  Add LVM_COMMAND_PROFILE env var to set default command profile name to use.
+  Set CLOEXEC flag on file descriptors originating in libdaemon.
+
+Version 2.02.116 - 30th January 2015
+====================================
+  Deactivate unused thin pools activated with lvm2 pre-2.02.112 versions.
+  Check lock holding LV when lvconverting stacked raid LV in cluster.
+  Support udev external dev info for filters: PV min size, mpath, md, partition.
+  Add fw_raid_component_detection lvm.conf option to enable FW raid detection.
+  Add devices/external_device_info_source lvm.conf option ("none" by default).
+  Scan pools in for_each_sub_lv() and add for_each_sub_lv_except_pools().
+  Fix lvm2app lvm_lv_get_property return value for fields with info/status ioctl.
+  Fix lvm2app regression in lvm_lv_get_attr causing unknown values (2.02.115).
+  Set default cache_mode to writehrough when missing in metadata.
+  Preserve chunk size with repair and metadata swap of a thin pool.
+  Fix raid --splitmirror 1 functionality (2.02.112).
+  Fix tree preload to handle splitting raid images.
+  Do not support unpartitioned DASD devices.
+  Improve config validation to check if setting with string value can be empty.
+
+Version 2.02.115 - 21st January 2015
+====================================
+  Report segment types without monitoring support as undefined.
+  Support lvchange --errorwhenfull for thin pools.
+  Improve the processing and reporting of duplicate PVs.
+  Report lv_health_status and health attribute also for thin pool.
+  Add lv_when_full reporting field.
+  Add support for lvcreate --errorwhenfull y|n for thin pools.
+  Fix lvconvert --repair to honour resilience requirement for segmented RAID LV.
+  Filter out partitioned device-mapper devices as unsuitable for use as PVs.
+  Also notify lvmetad about filtered device if using pvscan --cache DevicePath.
+  Use LVM's own selection instead of awk expressions in clvmd startup scripts.
+  Do not filter out snapshot origin LVs as unusable devices for an LVM stack.
+  Fix incorrect rimage names when converting from mirror to raid1 LV (2.02.112).
+  Introduce pvremove_many to avoid excessive metadata re-reading and messages.
+  Check for cmirror availability during cluster mirror creation and activation.
+  Add cache_policy and cache_settings reporting fields.
+  Add missing recognition for --binary option with {pv,vg,lv}display -C.
+  Fix vgimportclone to notify lvmetad about changes done if lvmetad is used.
+  Fix vgimportclone to properly override config if it is missing in lvm.conf.
+  Fix automatic use of configure --enable-udev-systemd-background-jobs.
+  Correctly rename active split LV with -splitmirrors for raid1.
+  Add report/compact_output to lvm.conf to enable/disable compact report output.
+  Still restrict mirror region size to power of 2 when VG extent size is not.
+
+Version 2.02.114 - 28th November 2014
+=====================================
+  Release socket in daemon_close and protocol string in daemon_open error path.
+  Add --cachepolicy and --cachesettings to lvcreate.
+  Fix regression when parsing /dev/mapper dir (2.02.112).
+  Fix missing rounding to 64KB when estimating optimal thin pool chunk size.
+  Fix typo in clvmd initscript causing CLVMD_STOP_TIMEOUT var to be ignored.
+  Fix size in pvresize "Resizing to ..." verbose msg to show proper result size.
+
+Version 2.02.113 - 24th November 2014
+=====================================
+  Add --cachepolicy and --cachesettings options to lvchange.
+  Validate that converted volume and specified pool volume differ in lvconvert.
+  Fix regression in vgscan --mknodes usage (2.02.112).
+  Respect --prefix when setting CLMVD_PATH configure (2.02.89).
+  Default to configure --enable-udev-systemd-background-jobs for systemd>=205.
+  Fix ignore_vg() to properly react on various vg_read errors (2.02.112).
+  Failed recovery returns FAILED_RECOVERY status flag for vg_read().
+  Exit with non-zero status code when pvck encounters a problem.
+  Fix clean_tree after activation/resume for cache target (2.02.112).
+
+Version 2.02.112 - 11th November 2014
+=====================================
+  Add cache_{read,write}_{hits,misses} reporting fields.
+  Add cache_{total,used,dirty}_blocks reporting fields.
+  Add _corig as reserved suffix.
+  Reduce number of VG writes and commits when creating spare volumes.
+  When remove_layer_from_lv() removes layer, restore subLV names.
+  Cache-pool in use becomes invisible LV.
+  Don't prompt for removal of _pmspare in VG without pool metadata LV.
+  Deactivation of snapshot origin detects and deactivates left-over snapshots.
+  Properly report error when taking snapshot of any cache type LV.
+  Add basic thread debugging messages to dmeventd.
+  Include threads being shutdown in dmeventd device registration responses.
+  Inital support for external users of thin pools based on transaction_id.
+  Report some basic percentage info for cache pools.
+  Introduce size_mb_arg_with_percent() for advanced size arg reading.
+  Add extra support for '.' as decimal point in size args.
+  Add configure parameters for default segment type choices.
+  Add global/sparse_segtype_default setting to use thin for --type sparse.
+  Update and correct lvcreate and lvcovert man pages.
+  Mark pools and snapshots as unzeroable volumes.
+  Check for zeroing of volume after segment type is fully detected.
+  Better support for persistent major and minor options with lvcreate.
+  Refactor lvcreate towards more complete validation of all supported options.
+  Support lvcreate --type linear.
+  Improve _should_wipe_lv() to warn with message.
+  Inform about temporarily created volumes only in verbose mode.
+  Better support for --test mode with pool creation.
+  Query lock holding LV when replacing and converting raid volumes.
+  Add extra validate for locked lv within validate_lv_cache_create().
+  Add internal lvseg_name() function.
+  Skip use of lock files for virtual internal VG names.
+  Fix selection on {vg,lv}_permissions fields to properly match selection criteria.
+  Fix lv_permissions reporting to display read-only{-override} instead of blank.
+  Fix liblvm2cmd and lvm shell to respect quotes around args in cmd line string.
+  Permit extent sizes > 128KB that are not power of 2 with lvm2 format.
+  Remove workaround for lvm2-monitor.service hang on stop if lvmetad stopped.
+  Change vgremove to use process_each_lv_in_vg.
+  Allow lvconvert --repair and --splitmirrors on internal LVs.
+  Introduce WARN_ flags to control some metadata warning messages.
+  Use process_each_pv in vgreduce.
+  Refactor process_each_pv in toollib.
+  Introduce single validation routine for pool chunk size.
+  Support --yes like --force in vg/lvremove to skip y|n prompt.
+  Support --yes with lvconvert --splitsnapshot.
+  Fix detection of unsupported thin external lvconversions.
+  Fix detection of unsupported cache and thin pool lvconversions.
+  Fix detection of unsupported lvconversion of cache to snapshot.
+  Improve code for creation of cache and cache pool volumes.
+  Check cluster-wide (not local) active status before removing LV.
+  Properly check if activation of removed cached LV really activated.
+  lvremove cached LV removes cachepool (keep with lvconvert --splitcache).
+  Always remove spare LV with last removed pool volume.
+  Support lvconvert --splitcache and --uncache of cached LV.
+  Option --cache has also shortcut -H (i.e. lvcreate -H).
+  Refactor lvcreate code and better preserve --type argument.
+  Refactor filter processing around lvmetad.
+  Refactor process_each_lv in toollib.
+  Refactor process_each_vg in toollib.
+  Pools cannot be used as external origin.
+  Use lv_update_and_reload() for snapshot reload.
+  Don't print message in adjusted_mirror_region_size() in activation.
+  Improve lv_update_and_reload() to find out proper lock holding LV.
+  Improve search of LV in lv_ondisk().
+  Do not scan sysfs in lv_check_not_in_use() when device is closed.
+  Backup final metadata after resync of mirror/raid.
+  Unify handling of --persistent option for lvcreate and lvchange.
+  Validate major and minor numbers stored in metadata.
+  Use -fPIE when linking -pie executables.
+  Support DEBUG_MEMLOCK to trap unsupported mmap usage.
+  Enable cache segment type by default.
+  Ensure only supported volume types are used with cache segments.
+  Fix inablility to specify cachemode when 'lvconvert'ing to cache-pool.
+  Grab cluster lock for active LVs when setting clustered attribute.
+  Use va_copy to properly pass va_list through functions.
+  Add function to detect rotational devices.
+  Review internal checks for mirror/raid/pvmove volumes.
+  Track mirror segment type with separate MIRROR flag.
+  Fix cmirror endian conversions.
+  Introduce lv_is_pvmove/locked/converting/merging macros.
+  Avoid leaving linear logical volume when thin pool creation fails.
+  Don't leak alloc_handle on raid target error path.
+  Properly validate raid leg names.
+  Archive metadata before starting their modification in raid target.
+  Add missing vg_revert() in suspend_lv() raid and snapshot error path.
+  Add missing backup of lvm2 metadata after some raid modifications.
+  Use vg memory pool for extent allocation.
+  Add allocation/physical_extent_size config option for default PE size of VGs.
+  Demote an error to a warning when devices known to lvmetad are filtered out.
+  Re-order filter evaluation, making component filters global.
+  Fix logic that checks for full scan before iterating through devices.
+  Introduce common code to modify metadata and reload updated LV.
+  Fix rename of active snapshot volume in cluster.
+  Make sure shared libraries are built with RELRO option.
+
 Version 2.02.111 - 1st September 2014
 =====================================
  Pass properly sized char buffers for sscanf when initializing clvmd.
--- a/106
+++ b/106
@@ -1,3 +1,109 @@
+Version 1.02.104 -
+=================================
+  Add dmstats.8 man page
+  Add report stats sub-command to provide repeating stats reports.
+  Add clear, delete, list, and print stats sub-commands.
+  Add create stats sub-command and --start, --length, --areas and --areasize.
+  Add a 'stats' command to dmsetup to configure, manage and report stats data.
+  Add --regionid, --allregions to specify a single stats region or all regions.
+  Add --allprograms for stats commands that filter by program ID.
+  Add --auxdata and --programid arguments to set stats aux data and program ID.
+  Add statistics fields to -o <field>
+  Add libdm-stats library to allow management of device-mapper statistics.
+  Add --units to control report field output units.
+  Add support to redisplay column headings for repeating column reports.
+  Fix report header and row resource leaks.
+  Report timestamps of ioctls with dmsetup -vvv.
+  Recognize report field name variants without any underscores too.
+  Add dmsetup --interval and --count to repeat reports at specified intervals.
+  Add dm_timestamp functions to libdevmapper.
+
+Version 1.02.103 - 24th July 2015
+=================================
+  Introduce libdevmapper wrappers for all malloc-related functions.
+
+Version 1.02.102 - 7th July 2015
+================================
+  Include tool.h for default non-library use.
+  Introduce format macros with embedded % such as FMTu64.
+
+Version 1.02.101 - 3rd July 2015
+================================
+  Add experimental support to passing messages in suspend tree.
+  Add dm_report_value_cache_{set,get} to support caching during report/select.
+  Add dm_report_reserved_handler to handle report reserved value actions.
+  Support dynamic value in select: DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE.
+  Support fuzzy names in select: DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES.
+  Thin pool trace messages show a device name and major:minor.
+
+Version 1.02.100 - 30th June 2015
+=================================
+  Add since, after, until and before time operators to be used in selection.
+  Add support for time in reports and selection: DM_REPORT_FIELD_TYPE_TIME.
+  Support report reserved value ranges: DM_REPORT_FIELD_RESERVED_VALUE_RANGE.
+  Support report reserved value names: DM_REPORT_FIELD_RESERVED_VALUE_NAMED.
+  Add DM_CONFIG_VALUE_FMT_{INT_OCTAL,STRING_NO_QUOTES} config value format flag.
+  Add DM_CONFIG_VALUE_FMT_COMMON_{ARRAY,EXTRA_SPACE} config value format flag.
+  Add dm_config_value_{get,set}_format_flags to get and set config value format.
+
+Version 1.02.99 - 20th June 2015
+================================
+  New dm_tree_node_set_thin_pool_read_only(DM_1_02_99) for read-only thin pool.
+  Enhance error message when thin-pool message fails.
+  Fix dmeventd logging to avoid threaded use of static variable.
+  Remove redundant dmeventd SIGALRM coded.
+
+Version 1.02.98 - 12th June 2015
+================================
+  Add dm_task_get_errno() to return any unexpected errno from a dm ioctl call.
+  Use copy of errno made after each dm ioctl call in case errno changes later.
+
+Version 1.02.97 - 15th May 2015
+===============================
+  New dm_task_get_info(DM_1_02_97) supports internal_suspend state.
+  New symbols are versioned and comes with versioned symbol name (DM_1_02_97).
+
+Version 1.02.96 - 2nd May 2015
+==============================
+  Fix selection to not match if using reserved value in criteria with >,<,>=,<.
+  Fix selection to not match reserved values for size fields if using >,<,>=,<.
+  Include uuid or device number in log message after ioctl failure.
+  Add DM_INTERNAL_SUSPEND_FLAG to dm-ioctl.h.
+  Install blkdeactivate script and its man page with make install_device-mapper.
+
+Version 1.02.95 - 15th March 2015
+=================================
+  Makefile regenerated.
+
+Version 1.02.94 - 4th March 2015
+================================
+  Add dm_report_object_is_selected for generalized interface for report/select.
+
+Version 1.02.93 - 21st January 2015
+===================================
+  Reduce severity of ioctl error message when dmeventd waitevent is interrupted.
+  Report 'unknown version' when incompatible version numbers were not obtained.
+  Report more info from thin pool status (out of data, metadata-ro, fail).
+  Support error_if_no_space for thin pool target.
+  Fix segfault while using selection with regex and unbuffered reporting.
+  Add dm_report_compact_fields to remove empty fields from report output.
+  Remove unimplemented dm_report_set_output_selection from libdevmapper.h.
+
+Version 1.02.92 - 24th November 2014
+====================================
+  Fix memory corruption with sorting empty string lists (1.02.86).
+  Fix man dmsetup.8 syntax warning of Groff
+  Accept unquoted strings and / in place of {} when parsing configs.
+
+Version 1.02.91 - 11th November 2014
+====================================
+  Update cache creation and dm_config_node to pass policy.
+  Allow activation of any thin-pool if transaction_id supplied is 0.
+  Don't print uninitialized stack bytes when non-root uses dm_check_version().
+  Fix selection criteria to not match reserved values when using >, <, >=, <.
+  Add DM_LIST_HEAD_INIT macro to libdevmapper.h.
+  Fix dm_is_dm_major to not issue error about missing /proc lines for dm module.
+
 Version 1.02.90 - 1st September 2014
 ====================================
  Restore proper buffer size for parsing mountinfo line (1.02.89)
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -37,6 +37,10 @@ AC_DEFUN([AC_TRY_CCFLAG],
    fi
 ])

+dnl AC_IF_YES([TEST-FOR-YES], [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+dnl AS_IF() abstraction, checks shell variable for 'yes'
+AC_DEFUN([AC_IF_YES], [AS_IF([test $$1 = yes], [$2], [$3])])
+
 dnl AC_TRY_LDFLAGS([LDFLAGS], [VAR], [ACTION-IF-WORKS], [ACTION-IF-FAILS])
 dnl check if $CC supports given ld flags

--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.13.4 -*- Autoconf -*-
+# generated automatically by aclocal 1.15 -*- Autoconf -*-

-# Copyright (C) 1996-2013 Free Software Foundation, Inc.
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.

 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
--- a/conf/.gitignore
+++ b/conf/.gitignore
@@ -0,0 +1,4 @@
+command_profile_template.profile
+example.conf
+lvmlocal.conf
+metadata_profile_template.profile
--- a/conf/Makefile.in
+++ b/conf/Makefile.in
@@ -17,24 +17,37 @@ top_builddir = @top_builddir@

 CONFSRC=example.conf
 CONFDEST=lvm.conf
+CONFLOCAL=lvmlocal.conf

 PROFILE_TEMPLATES=command_profile_template.profile metadata_profile_template.profile
 PROFILES=$(PROFILE_TEMPLATES) $(srcdir)/thin-generic.profile $(srcdir)/thin-performance.profile

 include $(top_builddir)/make.tmpl

+.PHONY: install_conf install_localconf install_profiles
+
+generate:
+	(cat $(top_srcdir)/conf/example.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --ignorelocal --withspaces) > example.conf.in
+	(cat $(top_srcdir)/conf/lvmlocal.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --withspaces local) > lvmlocal.conf.in
+
 install_conf: $(CONFSRC)
 	@if [ ! -e $(confdir)/$(CONFDEST) ]; then \
 		echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST)"; \
 		$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST); \
 	fi

+install_localconf: $(CONFLOCAL)
+	@if [ ! -e $(confdir)/$(CONFLOCAL) ]; then \
+		echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFLOCAL)"; \
+		$(INSTALL_WDATA) -D $< $(confdir)/$(CONFLOCAL); \
+	fi
+
 install_profiles: $(PROFILES)
 	$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_PROFILE_DIR)
 	$(INSTALL_DATA) $(PROFILES) $(DESTDIR)$(DEFAULT_PROFILE_DIR)/

-install_lvm2: install_conf install_profiles
+install_lvm2: install_conf install_localconf install_profiles

 install: install_lvm2

-DISTCLEAN_TARGETS += $(CONFSRC) $(PROFILE_TEMPLATES)
+DISTCLEAN_TARGETS += $(CONFSRC) $(CONFLOCAL) $(PROFILE_TEMPLATES)
--- a/conf/command_profile_template.profile.in
+++ b/conf/command_profile_template.profile.in
@@ -18,6 +18,7 @@ global {
 	lvdisplay_shows_full_device_path=0
 }
 report {
+	compact_output=0
 	aligned=1
 	buffered=1
 	headings=1
--- a/conf/example.conf.base
+++ b/conf/example.conf.base
@@ -0,0 +1,23 @@
+# This is an example configuration file for the LVM2 system.
+# It contains the default settings that would be used if there was no
+# @DEFAULT_SYS_DIR@/lvm.conf file.
+#
+# Refer to 'man lvm.conf' for further information including the file layout.
+#
+# Refer to 'man lvm.conf' for information about how settings configured in
+# this file are combined with built-in values and command line options to
+# arrive at the final values used by LVM.
+#
+# Refer to 'man lvmconfig' for information about displaying the built-in
+# and configured values used by LVM.
+#
+# If a default value is set in this file (not commented out), then a
+# new version of LVM using this file will continue using that value,
+# even if the new version of LVM changes the built-in default value.
+#
+# To put this file in a different directory and override @DEFAULT_SYS_DIR@ set
+# the environment variable LVM_SYSTEM_DIR before running the tools.
+#
+# N.B. Take care that each setting only appears once if uncommenting
+# example settings in this file.
+
--- a/conf/example.conf.in
+++ b/conf/example.conf.in
--- a/conf/lvmlocal.conf.base
+++ b/conf/lvmlocal.conf.base
@@ -0,0 +1,19 @@
+# This is a local configuration file template for the LVM2 system
+# which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
+#
+# Refer to 'man lvm.conf' for information about the file layout.
+#
+# To put this file in a different directory and override
+# @DEFAULT_SYS_DIR@ set the environment variable LVM_SYSTEM_DIR before
+# running the tools.
+#
+# The lvmlocal.conf file is normally expected to contain only the
+# "local" section which contains settings that should not be shared or
+# repeated among different hosts.  (But if other sections are present,
+# they *will* get processed.  Settings in this file override equivalent
+# ones in lvm.conf and are in turn overridden by ones in any enabled
+# lvm_<tag>.conf files.)
+#
+# Please take care that each setting only appears once if uncommenting
+# example settings in this file and never copy this file between hosts.
+
--- a/conf/lvmlocal.conf.in
+++ b/conf/lvmlocal.conf.in
@@ -0,0 +1,57 @@
+# This is a local configuration file template for the LVM2 system
+# which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
+#
+# Refer to 'man lvm.conf' for information about the file layout.
+#
+# To put this file in a different directory and override
+# @DEFAULT_SYS_DIR@ set the environment variable LVM_SYSTEM_DIR before
+# running the tools.
+#
+# The lvmlocal.conf file is normally expected to contain only the
+# "local" section which contains settings that should not be shared or
+# repeated among different hosts.  (But if other sections are present,
+# they *will* get processed.  Settings in this file override equivalent
+# ones in lvm.conf and are in turn overridden by ones in any enabled
+# lvm_<tag>.conf files.)
+#
+# Please take care that each setting only appears once if uncommenting
+# example settings in this file and never copy this file between hosts.
+
+
+# Configuration section local.
+# LVM settings that are specific to the local host.
+local {
+
+	# Configuration option local/system_id.
+	# Defines the local system ID for lvmlocal mode.
+	# This is used when global/system_id_source is set
+	# to 'lvmlocal' in the main configuration file,
+	# e.g. lvm.conf.
+	# When used, it must be set to a unique value
+	# among all hosts sharing access to the storage,
+	# e.g. a host name.
+	# Example:
+	# Set no system ID.
+	# system_id = ""
+	# Example:
+	# Set the system_id to the string 'host1'.
+	# system_id = "host1"
+	# system_id = ""
+
+	# Configuration option local/extra_system_ids.
+	# A list of extra VG system IDs the local host can access.
+	# VGs with the system IDs listed here (in addition
+	# to the host's own system ID) can be fully accessed
+	# by the local host.  (These are system IDs that the
+	# host sees in VGs, not system IDs that identify the
+	# local host, which is determined by system_id_source.)
+	# Use this only after consulting 'man lvmsystemid'
+	# to be certain of correct usage and possible dangers.
+	# This configuration option does not have a default value defined.
+
+	# Configuration option local/host_id.
+	# The lvmlockd sanlock host_id.
+	# This must be a unique among all hosts,
+	# and must be between 1 and 2000.
+	# host_id = 0
+}
--- a/conf/metadata_profile_template.profile.in
+++ b/conf/metadata_profile_template.profile.in
@@ -16,7 +16,7 @@ allocation {
 	thin_pool_zero=1
 	thin_pool_discards="passdown"
 	thin_pool_chunk_size_policy="generic"
-#	thin_pool_chunk_size=64
+#	thin_pool_chunk_size=128
 }
 activation {
 	thin_pool_autoextend_threshold=100
--- a/2049
+++ b/2049
--- a/configure.in
+++ b/configure.in
--- a/daemons/Makefile.in
+++ b/daemons/Makefile.in
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@@ -15,7 +15,7 @@ srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_builddir = @top_builddir@

-.PHONY: dmeventd clvmd cmirrord lvmetad
+.PHONY: dmeventd clvmd cmirrord lvmetad lvmpolld lvmlockd

 ifneq ("@CLVMD@", "none")
  SUBDIRS += clvmd
@@ -36,8 +36,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
  SUBDIRS += lvmetad
 endif

+ifeq ("@BUILD_LVMPOLLD@", "yes")
+  SUBDIRS += lvmpolld
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+  SUBDIRS += lvmlockd 
+endif
+
 ifeq ($(MAKECMDGOALS),distclean)
-  SUBDIRS = clvmd cmirrord dmeventd lvmetad
+  SUBDIRS = clvmd cmirrord dmeventd lvmetad lvmpolld lvmlockd
 endif

 include $(top_builddir)/make.tmpl
--- a/daemons/clvmd/.gitignore
+++ b/daemons/clvmd/.gitignore
@@ -0,0 +1 @@
+clvmd
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -36,10 +36,6 @@ SOURCES = \
 	lvm-functions.c  \
 	refresh_clvmd.c

-ifeq ("@DEBUG@", "yes")
-	DEFS += -DDEBUG
-endif
-
 ifneq (,$(findstring cman,, "@CLVMD@,"))
 	SOURCES += clvmd-cman.c
 	LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS)
--- a/daemons/clvmd/clvmd-command.c
+++ b/daemons/clvmd/clvmd-command.c
@@ -323,6 +323,7 @@ void cmd_client_cleanup(struct local_client *client)
 	int lkid;
 	char *lockname;

+	DEBUGLOG("Client thread cleanup (%p)\n", client);
 	if (!client->bits.localsock.private)
 		return;

@@ -331,7 +332,7 @@ void cmd_client_cleanup(struct local_client *client)
 	dm_hash_iterate(v, lock_hash) {
 		lkid = (int)(long)dm_hash_get_data(lock_hash, v);
 		lockname = dm_hash_get_key(lock_hash, v);
-		DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid);
+		DEBUGLOG("Cleanup (%p): Unlocking lock %s %x\n", client, lockname, lkid);
 		(void) sync_unlock(lockname, lkid);
 	}

@@ -339,7 +340,6 @@ void cmd_client_cleanup(struct local_client *client)
 	client->bits.localsock.private = NULL;
 }

-
 static int restart_clvmd(void)
 {
 	const char **argv;
--- a/daemons/clvmd/clvmd-common.h
+++ b/daemons/clvmd/clvmd-common.h
@@ -18,15 +18,10 @@
 #ifndef _LVM_CLVMD_COMMON_H
 #define _LVM_CLVMD_COMMON_H

-#include "configure.h"
-
 #define _REENTRANT
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64

-#include "libdevmapper.h"
+#include "tool.h"
+
 #include "lvm-logging.h"

-#include <unistd.h>
-
 #endif
--- a/daemons/clvmd/clvmd-openais.c
+++ b/daemons/clvmd/clvmd-openais.c
@@ -243,7 +243,7 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle,
 	struct node_info *ninfo;

 	DEBUGLOG("confchg callback. %" PRIsize_t " joined, "
-		 "%" PRIsize_t " left, %" PRIsize_t " members\n",
+		 FMTsize_t " left, %" PRIsize_t " members\n",
 		 joined_list_entries, left_list_entries, member_list_entries);

 	for (i=0; i<joined_list_entries; i++) {
--- a/daemons/clvmd/clvmd-singlenode.c
+++ b/daemons/clvmd/clvmd-singlenode.c
@@ -208,8 +208,6 @@ static int _lock_resource(const char *resource, int mode, int flags, int *lockid
 	pthread_mutex_lock(&_lock_mutex);

 retry:
-	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
-
 	if (!(head = dm_hash_lookup(_locks, resource))) {
 		if (flags & LCKF_CONVERT) {
 			/* In real DLM, lock is identified only by lockid, resource is not used */
@@ -269,12 +267,14 @@ retry:
 		dm_list_add(head, &lck->list);
 	}
 out:
+	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
 	pthread_mutex_unlock(&_lock_mutex);
 	DEBUGLOG("Locked resource %s, lockid=%d, mode=%s\n",
 		 resource, lck->lockid, _get_mode(lck->mode));

 	return 0;
 bad:
+	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
 	pthread_mutex_unlock(&_lock_mutex);
 	DEBUGLOG("Failed to lock resource %s\n", resource);

--- a/daemons/clvmd/clvmd.c
+++ b/daemons/clvmd/clvmd.c
@@ -153,16 +153,11 @@ static if_type_t get_cluster_type(void);
 static void usage(const char *prog, FILE *file)
 {
 	fprintf(file, "Usage: %s [options]\n"
-		"   -V       Show version of clvmd\n"
-		"   -h       Show this help information\n"
-		"   -d[n]    Set debug logging (0:none, 1:stderr (implies -f option), 2:syslog)\n"
-		"   -f       Don't fork, run in the foreground\n"
-		"   -E<lockuuid> Take this lock uuid as exclusively locked resource (for restart)\n"
-		"   -R       Tell all running clvmds in the cluster to reload their device cache\n"
-		"   -S       Restart clvmd, preserving exclusive locks\n"
 		"   -C       Sets debug level (from -d) on all clvmd instances clusterwide\n"
-		"   -t<secs> Command timeout (default 60 seconds)\n"
-		"   -T<secs> Startup timeout (default none)\n"
+		"   -d[n]    Set debug logging (0:none, 1:stderr (implies -f option), 2:syslog)\n"
+		"   -E<uuid> Take this lock uuid as exclusively locked resource (for restart)\n"
+		"   -f       Don't fork, run in the foreground\n"
+		"   -h       Show this help information\n"
 		"   -I<cmgr> Cluster manager (default: auto)\n"
 		"            Available cluster managers: "
 #ifdef USE_COROSYNC
@@ -177,6 +172,12 @@ static void usage(const char *prog, FILE *file)
 #ifdef USE_SINGLENODE
 		"singlenode "
 #endif
+		"\n"
+		"   -R       Tell all running clvmds in the cluster to reload their device cache\n"
+		"   -S       Restart clvmd, preserving exclusive locks\n"
+		"   -t<secs> Command timeout (default: 60 seconds)\n"
+		"   -T<secs> Startup timeout (default:  0 seconds)\n"
+		"   -V       Show version of clvmd\n"
 		"\n", prog);
 }

@@ -222,6 +223,7 @@ void debuglog(const char *fmt, ...)
 		fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime_r(&P, buf_ctime) + 4);
 		vfprintf(stderr, fmt, ap);
 		va_end(ap);
+		fflush(stderr);
 		break;
 	case DEBUG_SYSLOG:
 		if (!syslog_init) {
@@ -597,7 +599,9 @@ int main(int argc, char *argv[])

 	/* This needs to be started after cluster initialisation
 	   as it may need to take out locks */
-	DEBUGLOG("starting LVM thread\n");
+	DEBUGLOG("Starting LVM thread\n");
+	DEBUGLOG("Main cluster socket fd %d (%p) with local socket %d (%p)\n",
+		 local_client_head.fd, &local_client_head, newfd->fd, newfd);

 	/* Don't let anyone else to do work until we are started */
 	pthread_create(&lvm_thread, &stack_attr, lvm_thread_fn, &lvm_params);
@@ -697,7 +701,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
 		newfd->type = LOCAL_SOCK;
 		newfd->callback = local_sock_callback;
 		newfd->bits.localsock.all_success = 1;
-		DEBUGLOG("Got new connection on fd %d\n", newfd->fd);
+		DEBUGLOG("Got new connection on fd %d (%p)\n", newfd->fd, newfd);
 		*new_client = newfd;
 	}
 	return 1;
@@ -849,18 +853,48 @@ static void main_loop(int cmd_timeout)
 		struct local_client *thisfd;
 		struct timeval tv = { cmd_timeout, 0 };
 		int quorate = clops->is_quorate();
+		int client_count = 0;
+		int max_fd = 0;

 		/* Wait on the cluster FD and all local sockets/pipes */
 		local_client_head.fd = clops->get_main_cluster_fd();
 		FD_ZERO(&in);
+		struct local_client *lastfd = &local_client_head;
+		struct local_client *nextfd = local_client_head.next;
+
 		for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
+			client_count++;
+			max_fd = max(max_fd, thisfd->fd);
+		}
+
+		if (max_fd > FD_SETSIZE - 32) {
+			fprintf(stderr, "WARNING: There are too many connections to clvmd.  Investigate and take action now!\n");
+ 			fprintf(stderr, "WARNING: Your cluster may freeze up if the number of clvmd file descriptors (%d) exceeds %d.\n", max_fd + 1, FD_SETSIZE);
+		}
+
+		for (thisfd = &local_client_head; thisfd; thisfd = nextfd, nextfd = thisfd ? thisfd->next : NULL) {
+
+			if (thisfd->removeme && !cleanup_zombie(thisfd)) {
+				struct local_client *free_fd = thisfd;
+				lastfd->next = nextfd;
+				DEBUGLOG("removeme set for %p with %d monitored fds remaining\n", free_fd, client_count - 1);
+
+				/* Queue cleanup, this also frees the client struct */
+				add_to_lvmqueue(free_fd, NULL, 0, NULL);
+
+				continue;
+			}
+
+			lastfd = thisfd;
+
 			if (thisfd->removeme)
 				continue;

 			/* if the cluster is not quorate then don't listen for new requests */
 			if ((thisfd->type != LOCAL_RENDEZVOUS &&
 			     thisfd->type != LOCAL_SOCK) || quorate)
-				FD_SET(thisfd->fd, &in);
+				if (thisfd->fd < FD_SETSIZE)
+					FD_SET(thisfd->fd, &in);
 		}

 		select_status = select(FD_SETSIZE, &in, NULL, NULL, &tv);
@@ -876,31 +910,22 @@ static void main_loop(int cmd_timeout)
 		}

 		if (select_status > 0) {
-			struct local_client *lastfd = NULL;
 			char csid[MAX_CSID_LEN];
 			char buf[max_cluster_message];

 			for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
-				if (thisfd->removeme && !cleanup_zombie(thisfd)) {
-					struct local_client *free_fd = thisfd;
-					lastfd->next = thisfd->next;
-					DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
-
-					/* Queue cleanup, this also frees the client struct */
-					add_to_lvmqueue(free_fd, NULL, 0, NULL);
-					break;
-				}
-
-				if (FD_ISSET(thisfd->fd, &in)) {
+				if (thisfd->fd < FD_SETSIZE && FD_ISSET(thisfd->fd, &in)) {
 					struct local_client *newfd = NULL;
 					int ret;

+					/* FIXME Remove from main thread in case it blocks! */
 					/* Do callback */
 					ret = thisfd->callback(thisfd, buf, sizeof(buf),
 							       csid, &newfd);
 					/* Ignore EAGAIN */
-					if (ret < 0 && (errno == EAGAIN || errno == EINTR))
+					if (ret < 0 && (errno == EAGAIN || errno == EINTR)) {
 						continue;
+                                        }

 					/* Got error or EOF: Remove it from the list safely */
 					if (ret <= 0) {
@@ -914,17 +939,16 @@ static void main_loop(int cmd_timeout)
 						DEBUGLOG("ret == %d, errno = %d. removing client\n",
 							 ret, errno);
 						thisfd->removeme = 1;
-						break;
+						continue;
 					}

 					/* New client...simply add it to the list */
 					if (newfd) {
 						newfd->next = thisfd->next;
 						thisfd->next = newfd;
-						break;
+						thisfd = newfd;
 					}
 				}
-				lastfd = thisfd;
 			}
 		}

@@ -1128,7 +1152,7 @@ static void dump_message(char *buf, int len)
 		row[j] = buf[i];
 		str[j] = (isprint(buf[i])) ? buf[i] : ' ';

-		if ((j == 8) || (i + 1 == len)) {
+		if (i + 1 == len) {
 			for (;j < 8; ++j) {
 				row[j] = 0;
 				str[j] = ' ';
@@ -1417,7 +1441,7 @@ static int read_from_local_sock(struct local_client *thisfd)
 	thisfd->bits.localsock.in_progress = TRUE;
 	thisfd->bits.localsock.state = PRE_COMMAND;
 	thisfd->bits.localsock.cleanup_needed = 1;
-	DEBUGLOG("Creating pre&post thread\n");
+	DEBUGLOG("Creating pre&post thread for pipe fd %d (%p)\n", newfd->fd, newfd);
 	status = pthread_create(&thisfd->bits.localsock.threadid,
 				&stack_attr, pre_and_post_thread, thisfd);
 	DEBUGLOG("Created pre&post thread, state = %d\n", status);
@@ -1671,7 +1695,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
 	sigset_t ss;
 	int pipe_fd = client->bits.localsock.pipe;

-	DEBUGLOG("Pre&post thread (%p), pipe %d\n", client, pipe_fd);
+	DEBUGLOG("Pre&post thread (%p), pipe fd %d\n", client, pipe_fd);
 	pthread_mutex_lock(&client->bits.localsock.mutex);

 	/* Ignore SIGUSR1 (handled by master process) but enable
@@ -1691,7 +1715,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
 		if ((status = do_pre_command(client)))
 			client->bits.localsock.all_success = 0;

-		DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe %d\n",
+		DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe fd %d\n",
 			 client, status, pipe_fd);

 		/* Tell the parent process we have finished this bit */
@@ -1973,7 +1997,7 @@ static int process_work_item(struct lvm_thread_cmd *cmd)
 {
 	/* If msg is NULL then this is a cleanup request */
 	if (cmd->msg == NULL) {
-		DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
+		DEBUGLOG("process_work_item: free %p\n", cmd->client);
 		cmd_client_cleanup(cmd->client);
 		pthread_mutex_destroy(&cmd->client->bits.localsock.mutex);
 		pthread_cond_destroy(&cmd->client->bits.localsock.cond);
--- a/daemons/clvmd/lvm-functions.c
+++ b/daemons/clvmd/lvm-functions.c
@@ -136,7 +136,7 @@ static const char *decode_flags(unsigned char flags)
 		flags & LCK_DMEVENTD_MONITOR_MODE ? "DMEVENTD_MONITOR|" : "",
 		flags & LCK_ORIGIN_ONLY_MODE ? "ORIGIN_ONLY|" : "",
 		flags & LCK_TEST_MODE ? "TEST|" : "",
-		flags & LCK_CONVERT ? "CONVERT|" : "",
+		flags & LCK_CONVERT_MODE ? "CONVERT|" : "",
 		flags & LCK_DMEVENTD_MONITOR_IGNORE ? "DMEVENTD_MONITOR_IGNORE|" : "",
 		flags & LCK_REVERT_MODE ? "REVERT|" : "");

@@ -375,7 +375,7 @@ static int do_activate_lv(char *resource, unsigned char command, unsigned char l
 	 * of exclusive lock to shared one during activation.
 	 */
 	if (!test_mode() && command & LCK_CLUSTER_VG) {
-		status = hold_lock(resource, mode, LCKF_NOQUEUE | (lock_flags & LCK_CONVERT ? LCKF_CONVERT:0));
+		status = hold_lock(resource, mode, LCKF_NOQUEUE | ((lock_flags & LCK_CONVERT_MODE) ? LCKF_CONVERT:0));
 		if (status) {
 			/* Return an LVM-sensible error for this.
 			 * Forcing EIO makes the upper level return this text
@@ -510,7 +510,7 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
 	DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
 		 resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section());

-	if (!cmd->config_initialized || config_files_changed(cmd)) {
+	if (!cmd->initialized.config || config_files_changed(cmd)) {
 		/* Reinitialise various settings inc. logging, filters */
 		if (do_refresh_cache()) {
 			log_error("Updated config file invalid. Aborting.");
@@ -842,7 +842,7 @@ void lvm_do_backup(const char *vgname)

 	pthread_mutex_lock(&lvm_lock);

-	vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, 1, &consistent);
+	vg = vg_read_internal(cmd, vgname, NULL /*vgid*/, WARN_PV_READ, &consistent);

 	if (vg && consistent)
 		check_current_backup(vg);
@@ -899,7 +899,7 @@ int init_clvm(struct dm_hash_table *excl_uuid)
 	if (!get_initial_state(excl_uuid))
 		log_error("Cannot load initial lock states.");

-	if (!(cmd = create_toolcontext(1, NULL, 0, 1))) {
+	if (!(cmd = create_toolcontext(1, NULL, 0, 1, 1, 1))) {
 		log_error("Failed to allocate command context");
 		return 0;
 	}
--- a/daemons/cmirrord/.gitignore
+++ b/daemons/cmirrord/.gitignore
@@ -0,0 +1 @@
+cmirrord
--- a/daemons/cmirrord/compat.c
+++ b/daemons/cmirrord/compat.c
@@ -126,13 +126,14 @@ static int v5_endian_to_network(struct clog_request *rq)

 	u_rq->error = xlate32(u_rq->error);
 	u_rq->seq = xlate32(u_rq->seq);
-	u_rq->request_type = xlate32(u_rq->request_type);
-	u_rq->data_size = xlate64(u_rq->data_size);

 	rq->originator = xlate32(rq->originator);

 	v5_data_endian_switch(rq, 1);

+	u_rq->request_type = xlate32(u_rq->request_type);
+	u_rq->data_size = xlate32(u_rq->data_size);
+
 	return size;
 }

@@ -167,7 +168,7 @@ static int v5_endian_from_network(struct clog_request *rq)
 	u_rq->error = xlate32(u_rq->error);
 	u_rq->seq = xlate32(u_rq->seq);
 	u_rq->request_type = xlate32(u_rq->request_type);
-	u_rq->data_size = xlate64(u_rq->data_size);
+	u_rq->data_size = xlate32(u_rq->data_size);

 	rq->originator = xlate32(rq->originator);

@@ -182,12 +183,11 @@ int clog_request_from_network(void *data, size_t data_len)
 {
 	uint64_t *vp = data;
 	uint64_t version = xlate64(vp[0]);
-	uint64_t unconverted_version = vp[1];
 	struct clog_request *rq = data;

 	switch (version) {
 	case 5: /* Upstream */
-		if (version == unconverted_version)
+		if (version == vp[0])
 			return 0;
 		break;
 	case 4: /* RHEL 5.[45] */
--- a/daemons/dmeventd/.gitignore
+++ b/daemons/dmeventd/.gitignore
@@ -0,0 +1 @@
+dmeventd
--- a/daemons/dmeventd/dmeventd.c
+++ b/daemons/dmeventd/dmeventd.c
@@ -16,26 +16,21 @@
 * dmeventd - dm event daemon to monitor active mapped devices
 */

-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
+#include "tool.h"

-#include "configure.h"
-#include "libdevmapper.h"
-#include "libdevmapper-event.h"
-#include "dmeventd.h"
 //#include "libmultilog.h"
 #include "dm-logging.h"

-#include <stdarg.h>
+#include "libdevmapper-event.h"
+#include "dmeventd.h"
+
 #include <dlfcn.h>
-#include <errno.h>
 #include <pthread.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/resource.h>
-#include <unistd.h>
 #include <signal.h>
 #include <arpa/inet.h>		/* for htonl, ntohl */
 #include <fcntl.h>		/* for musl libc */
@@ -117,6 +112,42 @@ static int _foreground = 0;
 static int _restart = 0;
 static char **_initial_registrations = 0;

+/* FIXME Make configurable at runtime */
+#ifdef DEBUG
+#  define DEBUGLOG(fmt, args...) debuglog("[Thr %x]: " fmt, (int)pthread_self(), ## args)
+void debuglog(const char *fmt, ... ) __attribute__ ((format(printf, 1, 2)));
+
+void debuglog(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vsyslog(LOG_DEBUG, fmt, ap);
+	va_end(ap);
+}
+
+static const char *decode_cmd(uint32_t cmd)
+{
+	switch (cmd) {
+	case DM_EVENT_CMD_ACTIVE:			return "ACTIVE";
+	case DM_EVENT_CMD_REGISTER_FOR_EVENT:		return "REGISTER_FOR_EVENT";
+	case DM_EVENT_CMD_UNREGISTER_FOR_EVENT:		return "UNREGISTER_FOR_EVENT";
+	case DM_EVENT_CMD_GET_REGISTERED_DEVICE:	return "GET_REGISTERED_DEVICE";
+	case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE:	return "GET_NEXT_REGISTERED_DEVICE";
+	case DM_EVENT_CMD_SET_TIMEOUT:			return "SET_TIMEOUT";
+	case DM_EVENT_CMD_GET_TIMEOUT:			return "GET_TIMEOUT";
+	case DM_EVENT_CMD_HELLO:			return "HELLO";
+	case DM_EVENT_CMD_DIE:				return "DIE";
+	case DM_EVENT_CMD_GET_STATUS:			return "GET_STATUS";
+	case DM_EVENT_CMD_GET_PARAMETERS:		return "GET_PARAMETERS";
+	default:					return "unknown";
+	}
+}
+
+#else
+#  define DEBUGLOG(fmt, args...) do { } while (0)
+#endif
+
 /* Data kept about a DSO. */
 struct dso_data {
 	struct dm_list list;
@@ -538,6 +569,7 @@ static void *_timeout_thread(void *unused __attribute__((unused)))
 	struct timespec timeout;
 	time_t curr_time;

+	DEBUGLOG("Timeout thread starting.");
 	timeout.tv_nsec = 0;
 	pthread_cleanup_push(_exit_timeout, NULL);
 	pthread_mutex_lock(&_timeout_mutex);
@@ -549,6 +581,7 @@ static void *_timeout_thread(void *unused __attribute__((unused)))
 		dm_list_iterate_items_gen(thread, &_timeout_registry, timeout_list) {
 			if (thread->next_time <= curr_time) {
 				thread->next_time = curr_time + thread->timeout;
+				DEBUGLOG("Sending SIGALRM to Thr %x for timeout.", (int) thread->thread);
 				pthread_kill(thread->thread, SIGALRM);
 			}

@@ -560,6 +593,7 @@ static void *_timeout_thread(void *unused __attribute__((unused)))
 				       &timeout);
 	}

+	DEBUGLOG("Timeout thread finished.");
 	pthread_cleanup_pop(1);

 	return NULL;
@@ -640,9 +674,11 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
 	int ret = DM_WAIT_RETRY;
 	struct dm_task *dmt;
 	struct dm_info info;
+	int ioctl_errno;

 	*task = 0;

+	DEBUGLOG("Preparing waitevent task for %s", thread->device.uuid);
 	if (!(dmt = dm_task_create(DM_DEVICE_WAITEVENT)))
 		return DM_WAIT_RETRY;

@@ -661,32 +697,37 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
 	if (!_in_event_counter++)
 		dm_log_init(_no_intr_log);
 	_unlock_mutex();
+
+	DEBUGLOG("Starting waitevent task for %s", thread->device.uuid);
 	/*
 	 * This is so that you can break out of waiting on an event,
 	 * either for a timeout event, or to cancel the thread.
 	 */
 	set = _unblock_sigalrm();
-	errno = 0;
 	if (dm_task_run(dmt)) {
 		thread->current_events |= DM_EVENT_DEVICE_ERROR;
 		ret = DM_WAIT_INTR;

 		if ((ret = dm_task_get_info(dmt, &info)))
 			thread->event_nr = info.event_nr;
-	} else if (thread->events & DM_EVENT_TIMEOUT && errno == EINTR) {
-		thread->current_events |= DM_EVENT_TIMEOUT;
-		ret = DM_WAIT_INTR;
-	} else if (thread->status == DM_THREAD_SHUTDOWN && errno == EINTR) {
-		ret = DM_WAIT_FATAL;
 	} else {
-		syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
-		       errno, strerror(errno));
-		if (errno == ENXIO) {
-			syslog(LOG_ERR, "%s disappeared, detaching",
-			       thread->device.name);
+		ioctl_errno = dm_task_get_errno(dmt);
+		if (thread->events & DM_EVENT_TIMEOUT && ioctl_errno == EINTR) {
+			thread->current_events |= DM_EVENT_TIMEOUT;
+			ret = DM_WAIT_INTR;
+		} else if (thread->status == DM_THREAD_SHUTDOWN && ioctl_errno == EINTR)
 			ret = DM_WAIT_FATAL;
+		else {
+			syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
+			       ioctl_errno, strerror(ioctl_errno));
+			if (ioctl_errno == ENXIO) {
+				syslog(LOG_ERR, "%s disappeared, detaching",
+				       thread->device.name);
+				ret = DM_WAIT_FATAL;
+			}
 		}
 	}
+	DEBUGLOG("Completed waitevent task for %s", thread->device.uuid);

 	pthread_sigmask(SIG_SETMASK, &set, NULL);
 	_lock_mutex();
@@ -735,6 +776,7 @@ static void _monitor_unregister(void *arg)
 {
 	struct thread_status *thread = arg, *thread_iter;

+	DEBUGLOG("_monitor_unregister thread cleanup handler running");
 	if (!_do_unregister_device(thread))
 		syslog(LOG_ERR, "%s: %s unregister failed\n", __func__,
 		       thread->device.name);
@@ -760,6 +802,7 @@ static void _monitor_unregister(void *arg)
 			_unlock_mutex();
 			return;
 		}
+	DEBUGLOG("Marking Thr %x as DONE and unused.", (int)thread->thread);
 	thread->status = DM_THREAD_DONE;
 	UNLINK_THREAD(thread);
 	LINK(thread, &_thread_registry_unused);
@@ -863,6 +906,7 @@ static void *_monitor_thread(void *arg)
 		}
 	}

+	DEBUGLOG("Finished _monitor_thread");
 	pthread_cleanup_pop(1);

 	return NULL;
@@ -876,6 +920,7 @@ static int _create_thread(struct thread_status *thread)

 static int _terminate_thread(struct thread_status *thread)
 {
+	DEBUGLOG("Sending SIGALRM to terminate Thr %x.", (int)thread->thread);
 	return pthread_kill(thread->thread, SIGALRM);
 }

@@ -1099,6 +1144,7 @@ static int _unregister_for_event(struct message_data *message_data)
 	 * unlink and terminate its monitoring thread.
 	 */
 	if (!thread->events) {
+		DEBUGLOG("Marking Thr %x unused (no events).", (int)thread->thread);
 		UNLINK_THREAD(thread);
 		LINK(thread, &_thread_registry_unused);
 	}
@@ -1136,26 +1182,20 @@ static int _registered_device(struct message_data *message_data,
 }

 static int _want_registered_device(char *dso_name, char *device_uuid,
-				  struct thread_status *thread)
+				   struct thread_status *thread)
 {
 	/* If DSO names and device paths are equal. */
 	if (dso_name && device_uuid)
 		return !strcmp(dso_name, thread->dso_data->dso_name) &&
-		    !strcmp(device_uuid, thread->device.uuid) &&
-			(thread->status == DM_THREAD_RUNNING ||
-			 (thread->events & DM_EVENT_REGISTRATION_PENDING));
+		    !strcmp(device_uuid, thread->device.uuid);

 	/* If DSO names are equal. */
 	if (dso_name)
-		return !strcmp(dso_name, thread->dso_data->dso_name) &&
-			(thread->status == DM_THREAD_RUNNING ||
-			 (thread->events & DM_EVENT_REGISTRATION_PENDING));
+		return !strcmp(dso_name, thread->dso_data->dso_name);

 	/* If device paths are equal. */
 	if (device_uuid)
-		return !strcmp(device_uuid, thread->device.uuid) &&
-			(thread->status == DM_THREAD_RUNNING ||
-			 (thread->events & DM_EVENT_REGISTRATION_PENDING));
+		return !strcmp(device_uuid, thread->device.uuid);

 	return 1;
 }
@@ -1183,6 +1223,18 @@ static int _get_registered_dev(struct message_data *message_data, int next)
 	if (hit && !next)
 		goto reg;

+	/*
+	 * If we didn't get a match, try the threads waiting to be deleted.
+	 * FIXME Do something similar if 'next' is set.
+	 */
+	if (!hit && !next)
+		dm_list_iterate_items(thread, &_thread_registry_unused)
+			if (_want_registered_device(message_data->dso_name,
+						    message_data->device_uuid, thread)) {
+				hit = thread;
+				goto reg;
+			}
+
 	if (!hit)
 		goto out;

@@ -1518,6 +1570,8 @@ static void _process_request(struct dm_event_fifos *fifos)
 	if (!_client_read(fifos, &msg))
 		return;

+	DEBUGLOG("%s (0x%x) processing...", decode_cmd(msg.cmd), msg.cmd);
+
 	die = (msg.cmd == DM_EVENT_CMD_DIE) ? 1 : 0;

 	/* _do_process_request fills in msg (if memory allows for
@@ -1529,6 +1583,8 @@ static void _process_request(struct dm_event_fifos *fifos)

 	dm_free(msg.data);

+	DEBUGLOG("%s (0x%x) completed.", decode_cmd(msg.cmd), msg.cmd);
+
 	if (die) {
 		if (unlink(DMEVENTD_PIDFILE))
 			perror(DMEVENTD_PIDFILE ": unlink failed");
@@ -1577,10 +1633,8 @@ static void _cleanup_unused_threads(void)
 				if (ret == ESRCH) {
 					thread->status = DM_THREAD_DONE;
 				} else if (ret) {
-					syslog(LOG_ERR,
-					       "Unable to terminate thread: %s\n",
-					       strerror(-ret));
-					stack;
+					syslog(LOG_ERR, "Unable to terminate thread: %s",
+					       strerror(ret));
 				}
 				break;
 			}
@@ -1595,6 +1649,7 @@ static void _cleanup_unused_threads(void)
 		}

 		if (thread->status == DM_THREAD_DONE) {
+			DEBUGLOG("Destroying Thr %x.", (int)thread->thread);
 			dm_list_del(l);
 			_unlock_mutex();
 			join_ret = pthread_join(thread->thread, NULL);
@@ -1611,7 +1666,7 @@ static void _cleanup_unused_threads(void)

 static void _sig_alarm(int signum __attribute__((unused)))
 {
-	pthread_testcancel();
+	/* empty SIG_IGN */;
 }

 /* Init thread signal handling. */
@@ -1934,8 +1989,8 @@ static void restart(void)

 	if (version < 1) {
 		fprintf(stderr, "WARNING: The running dmeventd instance is too old.\n"
-			        "Protocol version %d (required: 1). Action cancelled.\n",
-			        version);
+				"Protocol version %d (required: 1). Action cancelled.\n",
+				version);
 		goto bad;
 	}

--- a/daemons/dmeventd/libdevmapper-event.c
+++ b/daemons/dmeventd/libdevmapper-event.c
@@ -17,15 +17,10 @@
 //#include "libmultilog.h"
 #include "dmeventd.h"

-#include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 #include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
 #include <sys/wait.h>
 #include <arpa/inet.h>		/* for htonl, ntohl */

--- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
+++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
@@ -102,7 +102,8 @@ int dmeventd_lvm2_init(void)
 		goto out;

 	if (!_lvm_handle) {
-		lvm2_log_fn(_temporary_log_fn);
+		if (!getenv("LVM_LOG_FILE_EPOCH"))
+			lvm2_log_fn(_temporary_log_fn);
 		if (!(_lvm_handle = lvm2_init())) {
 			dm_pool_destroy(_mem_pool);
 			_mem_pool = NULL;
--- a/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
+++ b/daemons/dmeventd/plugins/mirror/dmeventd_mirror.c
@@ -135,11 +135,21 @@ static int _remove_failed_devices(const char *device)
 #define CMD_SIZE 256	/* FIXME Use system restriction */
 	char cmd_str[CMD_SIZE];

+	if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
+				   "lvscan --cache", device))
+		return -1;
+
+	r = dmeventd_lvm2_run(cmd_str);
+
+	if (!r)
+		syslog(LOG_INFO, "Re-scan of mirror device %s failed.", device);
+
 	if (!dmeventd_lvm2_command(dmeventd_lvm2_pool(), cmd_str, sizeof(cmd_str),
 				  "lvconvert --config devices{ignore_suspended_devices=1} "
 				  "--repair --use-policies", device))
 		return -ENAMETOOLONG; /* FIXME Replace with generic error return - reason for failure has already got logged */

+	/* if repair goes OK, report success even if lvscan has failed */
 	r = dmeventd_lvm2_run(cmd_str);

 	syslog(LOG_INFO, "Repair of mirrored device %s %s.", device,
--- a/daemons/lvmetad/.gitignore
+++ b/daemons/lvmetad/.gitignore
@@ -0,0 +1,2 @@
+lvmetad
+lvmetactl
--- a/daemons/lvmetad/Makefile.in
+++ b/daemons/lvmetad/Makefile.in
@@ -18,7 +18,7 @@ top_builddir = @top_builddir@
 SOURCES = lvmetad-core.c
 SOURCES2 = testclient.c

-TARGETS = lvmetad lvmetad-testclient
+TARGETS = lvmetad lvmetactl

 .PHONY: install_lvmetad

@@ -39,8 +39,11 @@ CFLAGS += $(EXTRA_EXEC_CFLAGS)

 lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
 		    $(top_builddir)/libdaemon/server/libdaemonserver.a
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
-	$(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+	$(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LVMLIBS)

 # TODO: No idea. No idea how to test either.
 #ifneq ("$(CFLOW_CMD)", "")
--- a/daemons/lvmetad/lvmetactl.c
+++ b/daemons/lvmetad/lvmetactl.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmetad-client.h"
+
+daemon_handle h;
+
+static void print_reply(daemon_reply reply)
+{
+	const char *a = daemon_reply_str(reply, "response", NULL);
+	const char *b = daemon_reply_str(reply, "status", NULL);
+	const char *c = daemon_reply_str(reply, "reason", NULL);
+
+	printf("response \"%s\" status \"%s\" reason \"%s\"\n",
+	       a ? a : "", b ? b : "", c ? c : "");
+}
+
+int main(int argc, char **argv)
+{
+	daemon_reply reply;
+	char *cmd;
+	char *uuid;
+	char *name;
+	int val;
+	int ver;
+
+	if (argc < 2) {
+		printf("lvmeta dump\n");
+		printf("lvmeta pv_list\n");
+		printf("lvmeta vg_list\n");
+		printf("lvmeta vg_lookup_name <name>\n");
+		printf("lvmeta vg_lookup_uuid <uuid>\n");
+		printf("lvmeta pv_lookup_uuid <uuid>\n");
+		printf("lvmeta set_global_invalid 0|1\n");
+		printf("lvmeta get_global_invalid\n");
+		printf("lvmeta set_vg_version <uuid> <version>\n");
+		printf("lvmeta vg_lock_type <uuid>\n");
+		return -1;
+	}
+
+	cmd = argv[1];
+
+	h = lvmetad_open(NULL);
+
+	if (!strcmp(cmd, "dump")) {
+		reply = daemon_send_simple(h, "dump",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "pv_list")) {
+		reply = daemon_send_simple(h, "pv_list",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_list")) {
+		reply = daemon_send_simple(h, "vg_list",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "set_global_invalid")) {
+		if (argc < 3) {
+			printf("set_global_invalid 0|1\n");
+			return -1;
+		}
+		val = atoi(argv[2]);
+
+		reply = daemon_send_simple(h, "set_global_info",
+					   "global_invalid = %d", val,
+					   "token = %s", "skip",
+					   NULL);
+		print_reply(reply);
+
+	} else if (!strcmp(cmd, "get_global_invalid")) {
+		reply = daemon_send_simple(h, "get_global_info",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "set_vg_version")) {
+		if (argc < 4) {
+			printf("set_vg_version <uuid> <ver>\n");
+			return -1;
+		}
+		uuid = argv[2];
+		ver = atoi(argv[3]);
+
+		reply = daemon_send_simple(h, "set_vg_info",
+					   "uuid = %s", uuid,
+					   "version = %d", ver,
+					   "token = %s", "skip",
+					   NULL);
+		print_reply(reply);
+
+	} else if (!strcmp(cmd, "vg_lookup_name")) {
+		if (argc < 3) {
+			printf("vg_lookup_name <name>\n");
+			return -1;
+		}
+		name = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "name = %s", name,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_lookup_uuid")) {
+		if (argc < 3) {
+			printf("vg_lookup_uuid <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_lock_type")) {
+		struct dm_config_node *metadata;
+		const char *lock_type;
+
+		if (argc < 3) {
+			printf("vg_lock_type <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		/* printf("%s\n", reply.buffer.mem); */
+
+		metadata = dm_config_find_node(reply.cft->root, "metadata");
+		if (!metadata) {
+			printf("no metadata\n");
+			goto out;
+		}
+
+		lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
+		if (!lock_type) {
+			printf("no lock_type\n");
+			goto out;
+		}
+		printf("lock_type %s\n", lock_type);
+
+	} else if (!strcmp(cmd, "pv_lookup_uuid")) {
+		if (argc < 3) {
+			printf("pv_lookup_uuid <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "pv_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else {
+		printf("unknown command\n");
+		goto out_close;
+	}
+out:
+	daemon_reply_destroy(reply);
+out_close:
+	daemon_close(h);
+	return 0;
+}
--- a/daemons/lvmetad/lvmetad-core.c
+++ b/daemons/lvmetad/lvmetad-core.c
@@ -14,23 +14,114 @@

 #define _XOPEN_SOURCE 500  /* pthread */

-#include "configure.h"
+#define _REENTRANT
+
+#include "tool.h"
+
 #include "daemon-io.h"
-#include "config-util.h"
 #include "daemon-server.h"
 #include "daemon-log.h"
 #include "lvm-version.h"

 #include <assert.h>
 #include <pthread.h>
-#include <stdint.h>
-#include <unistd.h>
-
-#include <math.h>  /* fabs() */
-#include <float.h> /* DBL_EPSILON */

 #define LVMETAD_SOCKET DEFAULT_RUN_DIR "/lvmetad.socket"

+/*
+ * valid/invalid state of cached metadata
+ *
+ * Normally when using lvmetad, the state is kept up-to-date through a
+ * combination of notifications from clients and updates triggered by uevents.
+ * When using lvmlockd, the lvmetad state is expected to become out of
+ * date (invalid/stale) when other hosts make changes to the metadata on disk.
+ *
+ * To deal with this, the metadata cached in lvmetad can be flagged as invalid.
+ * This invalid flag is returned along with the metadata when read by a
+ * command.  The command can check for the invalid flag and decide that it
+ * should either use the stale metadata (uncommon), or read the latest metadata
+ * from disk rather than using the invalid metadata that was returned.  If the
+ * command reads the latest metadata from disk, it can choose to send it to
+ * lvmetad to update the cached copy and clear the invalid flag in lvmetad.
+ * Otherwise, the next command to read the metadata from lvmetad will also
+ * receive the invalid metadata with the invalid flag (and like the previous
+ * command, it too may choose to read the latest metadata from disk and can
+ * then also choose to update the lvmetad copy.)
+ *
+ * For purposes of tracking the invalid state, LVM metadata is considered
+ * to be either VG-specific or global.  VG-specific metadata is metadata
+ * that is isolated to a VG, such as the LVs it contains.  Global
+ * metadata is metadata that is not isolated to a single VG.  Global
+ * metdata includes:
+ * . the VG namespace (which VG names are used)
+ * . the set of orphan PVs (which PVs are in VGs and which are not)
+ * . properties of orphan PVs (the size of an orphan PV)
+ *
+ * If the metadata for a single VG becomes invalid, the VGFL_INVALID
+ * flag can be set in the vg_info struct for that VG.  If the global
+ * metdata becomes invalid, the GLFL_INVALID flag can be set in the
+ * lvmetad daemon state.
+ *
+ * If a command reads VG metadata and VGFL_INVALID is set, an
+ * extra config node called "vg_invalid" is added to the config
+ * data returned to the command.
+ *
+ * If a command reads global metdata and GLFL_INVALID is set, an
+ * extra config node called "global_invalid" is added to the
+ * config data returned to the command.
+ *
+ * If a command sees vg_invalid, and wants the latest VG metadata,
+ * it only needs to scan disks of the PVs in that VG.
+ * It can then use vg_update to send the latest metadata to lvmetad
+ * which clears the VGFL_INVALID flag.
+ *
+ * If a command sees global_invalid, and wants the latest metadata,
+ * it should scan all devices to update lvmetad, and then send
+ * lvmetad the "set_global_info global_invalid=0" message to clear
+ * GLFL_INVALID.
+ *
+ * (When rescanning devices to update lvmetad, the command must use
+ * the global filter cmd->lvmetad_filter so that it processes the same
+ * devices that are seen by lvmetad.)
+ *
+ * The lvmetad INVALID flags can be set by sending lvmetad the messages:
+ *
+ * . set_vg_info with the latest VG seqno.  If the VG seqno is larger
+ *   than the cached VG seqno, VGFL_INVALID is set for the VG.
+ *
+ * . set_global_info with global_invalid=1 sets GLFL_INVALID.
+ *
+ * Different entities could use these functions to invalidate metadata
+ * if/when they detected that the cache is stale.  How they detect that
+ * the cache is stale depends on the details of the specific entity.
+ *
+ * In the case of lvmlockd, it embeds values into its locks to keep track
+ * of when other nodes have changed metadata on disk related to those locks.
+ * When acquring locks it can look at these values and detect that
+ * the metadata associated with the lock has been changed.
+ * When the values change, it uses set_vg_info/set_global_info to
+ * invalidate the lvmetad cache.
+ *
+ * The values that lvmlockd distributes through its locks are the
+ * latest VG seqno in VG locks and a global counter in the global lock.
+ * When a host acquires a VG lock and sees that the embedded seqno is
+ * larger than it was previously, it knows that it should invalidate the
+ * lvmetad cache for the VG.  If the host acquires the global lock
+ * and sees that the counter is larger than previously, it knows that
+ * it should invalidate the global info in lvmetad.  This invalidation
+ * is done before the lock is returned to the command.  This way the
+ * invalid flag will be set on the metadata before the command reads
+ * it from lvmetad.
+ */
+
+struct vg_info {
+	int64_t external_version;
+	uint32_t flags; /* VGFL_ */
+};
+
+#define GLFL_INVALID 0x00000001
+#define VGFL_INVALID 0x00000001
+
 typedef struct {
 	log_state *log; /* convenience */
 	const char *log_config;
@@ -40,6 +131,8 @@ typedef struct {

 	struct dm_hash_table *vgid_to_metadata;
 	struct dm_hash_table *vgid_to_vgname;
+	struct dm_hash_table *vgid_to_outdated_pvs;
+	struct dm_hash_table *vgid_to_info;
 	struct dm_hash_table *vgname_to_vgid;
 	struct dm_hash_table *pvid_to_vgid;
 	struct {
@@ -50,6 +143,7 @@ typedef struct {
 		pthread_mutex_t pvid_to_vgid;
 	} lock;
 	char token[128];
+	uint32_t flags; /* GLFL_ */
 	pthread_mutex_t token_lock;
 } lvmetad_state;

@@ -60,17 +154,19 @@ static void destroy_metadata_hashes(lvmetad_state *s)
 	dm_hash_iterate(n, s->vgid_to_metadata)
 		dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n));

+	dm_hash_iterate(n, s->vgid_to_outdated_pvs)
+		dm_config_destroy(dm_hash_get_data(s->vgid_to_outdated_pvs, n));
+
 	dm_hash_iterate(n, s->pvid_to_pvmeta)
 		dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n));

 	dm_hash_destroy(s->pvid_to_pvmeta);
 	dm_hash_destroy(s->vgid_to_metadata);
 	dm_hash_destroy(s->vgid_to_vgname);
+	dm_hash_destroy(s->vgid_to_outdated_pvs);
+	dm_hash_destroy(s->vgid_to_info);
 	dm_hash_destroy(s->vgname_to_vgid);

-	dm_hash_iterate(n, s->device_to_pvid)
-		dm_free(dm_hash_get_data(s->device_to_pvid, n));
-
 	dm_hash_destroy(s->device_to_pvid);
 	dm_hash_destroy(s->pvid_to_vgid);
 }
@@ -81,6 +177,8 @@ static void create_metadata_hashes(lvmetad_state *s)
 	s->device_to_pvid = dm_hash_create(32);
 	s->vgid_to_metadata = dm_hash_create(32);
 	s->vgid_to_vgname = dm_hash_create(32);
+	s->vgid_to_outdated_pvs = dm_hash_create(32);
+	s->vgid_to_info = dm_hash_create(32);
 	s->pvid_to_vgid = dm_hash_create(32);
 	s->vgname_to_vgid = dm_hash_create(32);
 }
@@ -244,6 +342,30 @@ static int update_pv_status(lvmetad_state *s,
 	return complete;
 }

+static struct dm_config_node *add_last_node(struct dm_config_tree *cft, const char *node_name)
+{
+	struct dm_config_node *cn, *last;
+
+	cn = cft->root;
+	last = cn;
+
+	while (cn->sib) {
+		last = cn->sib;
+		cn = last;
+	}
+
+	cn = dm_config_create_node(cft, node_name);
+	if (!cn)
+		return NULL;
+
+	cn->v = NULL;
+	cn->sib = NULL;
+	cn->parent = cft->root;
+	last->sib = cn;
+
+	return cn;
+}
+
 static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid,
 					   struct dm_config_tree *cft,
 					   struct dm_config_node *parent,
@@ -307,6 +429,9 @@ static response pv_list(lvmetad_state *s, request r)
 		cn = make_pv_node(s, id, res.cft, cn_pvs, cn);
 	}

+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
+
 	unlock_pvid_to_pvmeta(s);

 	return res;
@@ -351,6 +476,9 @@ static response pv_lookup(lvmetad_state *s, request r)
 	pv->key = "physical_volume";
 	unlock_pvid_to_pvmeta(s);

+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
+
 	return res;
 }

@@ -419,14 +547,87 @@ static response vg_list(lvmetad_state *s, request r)
 	}

 	unlock_vgid_to_metadata(s);
+
+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
 bad:
 	return res;
 }

+static void mark_outdated_pv(lvmetad_state *s, const char *vgid, const char *pvid)
+{
+	struct dm_config_tree *pvmeta, *outdated_pvs;
+	struct dm_config_node *list, *cft_vgid;
+	struct dm_config_value *v;
+
+	lock_pvid_to_pvmeta(s);
+	pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+	unlock_pvid_to_pvmeta(s);
+
+	/* if the MDA exists and is used, it will have ignore=0 set */
+	if (!pvmeta ||
+	    (dm_config_find_int64(pvmeta->root, "pvmeta/mda0/ignore", 1) &&
+	     dm_config_find_int64(pvmeta->root, "pvmeta/mda1/ignore", 1)))
+		return;
+
+	WARN(s, "PV %s has outdated metadata", pvid);
+
+	outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
+	if (!outdated_pvs) {
+		if (!(outdated_pvs = dm_config_from_string("outdated_pvs/pv_list = []")) ||
+		    !(cft_vgid = make_text_node(outdated_pvs, "vgid", dm_pool_strdup(outdated_pvs->mem, vgid),
+						outdated_pvs->root, NULL)))
+			abort();
+		if(!dm_hash_insert(s->vgid_to_outdated_pvs, cft_vgid->v->v.str, outdated_pvs))
+			abort();
+		DEBUGLOG(s, "created outdated_pvs list for VG %s", vgid);
+	}
+
+	list = dm_config_find_node(outdated_pvs->root, "outdated_pvs/pv_list");
+	v = list->v;
+	while (v) {
+		if (v->type != DM_CFG_EMPTY_ARRAY && !strcmp(v->v.str, pvid))
+			return;
+		v = v->next;
+	}
+	if (!(v = dm_config_create_value(outdated_pvs)))
+		abort();
+	v->type = DM_CFG_STRING;
+	v->v.str = dm_pool_strdup(outdated_pvs->mem, pvid);
+	v->next = list->v;
+	list->v = v;
+}
+
+static void chain_outdated_pvs(lvmetad_state *s, const char *vgid, struct dm_config_tree *metadata_cft, struct dm_config_node *metadata)
+{
+	struct dm_config_tree *cft = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid), *pvmeta;
+	struct dm_config_node *pv, *res, *out_pvs = cft ? dm_config_find_node(cft->root, "outdated_pvs/pv_list") : NULL;
+	struct dm_config_value *pvs_v = out_pvs ? out_pvs->v : NULL;
+	if (!pvs_v)
+		return;
+	if (!(res = make_config_node(metadata_cft, "outdated_pvs", metadata_cft->root, 0)))
+		return; /* oops */
+	res->sib = metadata->child;
+	metadata->child = res;
+	for (; pvs_v && pvs_v->type != DM_CFG_EMPTY_ARRAY; pvs_v = pvs_v->next) {
+		pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvs_v->v.str);
+		if (!pvmeta) {
+			WARN(s, "metadata for PV %s not found", pvs_v->v.str);
+			continue;
+		}
+		if (!(pv = dm_config_clone_node(metadata_cft, pvmeta->root, 0)))
+			continue;
+		pv->key = dm_config_find_str(pv, "pvmeta/id", NULL);
+		pv->sib = res->child;
+		res->child = pv;
+	}
+}
+
 static response vg_lookup(lvmetad_state *s, request r)
 {
 	struct dm_config_tree *cft;
 	struct dm_config_node *metadata, *n;
+	struct vg_info *info;
 	response res = { 0 };

 	const char *uuid = daemon_request_str(r, "uuid", NULL);
@@ -489,6 +690,17 @@ static response vg_lookup(lvmetad_state *s, request r)
 	unlock_vg(s, uuid);

 	update_pv_status(s, res.cft, n, 1); /* FIXME report errors */
+	chain_outdated_pvs(s, uuid, res.cft, n);
+
+        if (s->flags & GLFL_INVALID)
+                add_last_node(res.cft, "global_invalid");
+
+	info = dm_hash_lookup(s->vgid_to_info, uuid);
+	if (info && (info->flags & VGFL_INVALID)) {
+		n = add_last_node(res.cft, "vg_invalid");
+		if (!n)
+			goto bad;
+	}

 	return res;
 bad:
@@ -496,65 +708,13 @@ bad:
 	return reply_fail("out of memory");
 }

-/* Test if the doubles are close enough to be considered equal */
-static int close_enough(double d1, double d2)
-{
-	return fabs(d1 - d2) < DBL_EPSILON;
-}
-
-static int compare_value(struct dm_config_value *a, struct dm_config_value *b)
-{
-	int r = 0;
-
-	if (a->type > b->type)
-		return 1;
-	if (a->type < b->type)
-		return -1;
-
-	switch (a->type) {
-	case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break;
-	case DM_CFG_FLOAT: r = close_enough(a->v.f, b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break;
-	case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break;
-	case DM_CFG_EMPTY_ARRAY: return 0;
-	}
-
-	if (r == 0 && a->next && b->next)
-		r = compare_value(a->next, b->next);
-	return r;
-}
-
-static int compare_config(struct dm_config_node *a, struct dm_config_node *b)
-{
-	int result = 0;
-	if (a->v && b->v)
-		result = compare_value(a->v, b->v);
-	if (a->v && !b->v)
-		result = 1;
-	if (!a->v && b->v)
-		result = -1;
-	if (a->child && b->child)
-		result = compare_config(a->child, b->child);
-
-	if (result) {
-		// DEBUGLOG("config inequality at %s / %s", a->key, b->key);
-		return result;
-	}
-
-	if (a->sib && b->sib)
-		result = compare_config(a->sib, b->sib);
-	if (a->sib && !b->sib)
-		result = 1;
-	if (!a->sib && b->sib)
-		result = -1;
-
-	return result;
-}
-
 static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_pvids);

+enum update_pvid_mode { UPDATE_ONLY, REMOVE_EMPTY, MARK_OUTDATED };
+
 /* You need to be holding the pvid_to_vgid lock already to call this. */
 static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
-			       const char *vgid, int nuke_empty)
+			       const char *vgid, int mode)
 {
 	struct dm_config_node *pv;
 	struct dm_hash_table *to_check;
@@ -574,11 +734,14 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
 		if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
 			continue;

-		if (nuke_empty &&
+		if (mode == REMOVE_EMPTY &&
 		    (vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
 		    !dm_hash_insert(to_check, vgid_old, (void*) 1))
 			goto out;

+		if (mode == MARK_OUTDATED)
+			mark_outdated_pv(s, vgid, pvid);
+
 		if (!dm_hash_insert(s->pvid_to_vgid, pvid, (void*) vgid))
 			goto out;

@@ -602,10 +765,11 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
 /* A pvid map lock needs to be held if update_pvids = 1. */
 static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
 {
-	struct dm_config_tree *old;
+	struct dm_config_tree *old, *outdated_pvs;
 	const char *oldname;
 	lock_vgid_to_metadata(s);
 	old = dm_hash_lookup(s->vgid_to_metadata, vgid);
+	outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
 	oldname = dm_hash_lookup(s->vgid_to_vgname, vgid);

 	if (!old) {
@@ -619,12 +783,15 @@ static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
 	dm_hash_remove(s->vgid_to_metadata, vgid);
 	dm_hash_remove(s->vgid_to_vgname, vgid);
 	dm_hash_remove(s->vgname_to_vgid, oldname);
+	dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
 	unlock_vgid_to_metadata(s);

 	if (update_pvids)
 		/* FIXME: What should happen when update fails */
 		update_pvid_to_vgid(s, old, "#orphan", 0);
 	dm_config_destroy(old);
+	if (outdated_pvs)
+		dm_config_destroy(outdated_pvs);
 	return 1;
 }

@@ -668,7 +835,7 @@ static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_p
 * this function, so they can be safely destroyed after update_metadata returns
 * (anything that might have been retained is copied). */
 static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid,
-			   struct dm_config_node *metadata, int64_t *oldseq)
+			   struct dm_config_node *metadata, int64_t *oldseq, const char *pvid)
 {
 	struct dm_config_tree *cft = NULL;
 	struct dm_config_tree *old;
@@ -717,6 +884,10 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid

 	if (seq < haveseq) {
 		DEBUGLOG(s, "Refusing to update metadata for %s (at %d) to %d", _vgid, haveseq, seq);
+
+		if (pvid)
+			mark_outdated_pv(s, dm_config_find_str(old->root, "metadata/id", NULL), pvid);
+
 		/* TODO: notify the client that their metadata is out of date? */
 		retval = 1;
 		goto out;
@@ -739,6 +910,8 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid

 	if (haveseq >= 0 && haveseq < seq) {
 		INFO(s, "Updating metadata for %s at %d to %d", _vgid, haveseq, seq);
+		if (oldseq)
+			update_pvid_to_vgid(s, old, vgid, MARK_OUTDATED);
 		/* temporarily orphan all of our PVs */
 		update_pvid_to_vgid(s, old, "#orphan", 0);
 	}
@@ -773,12 +946,46 @@ out: /* FIXME: We should probably abort() on partial failures. */
 	return retval;
 }

+static dev_t device_remove(lvmetad_state *s, struct dm_config_tree *pvmeta, dev_t device)
+{
+	struct dm_config_node *pvmeta_tmp;
+	struct dm_config_value *v = NULL;
+	dev_t alt_device = 0, prim_device = 0;
+
+	if ((pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/devices_alternate")))
+		v = pvmeta_tmp->v;
+
+	prim_device = dm_config_find_int64(pvmeta->root, "pvmeta/device", 0);
+
+	/* it is the primary device */
+	if (device > 0 && device == prim_device && pvmeta_tmp && pvmeta_tmp->v)
+	{
+		alt_device = pvmeta_tmp->v->v.i;
+		pvmeta_tmp->v = pvmeta_tmp->v->next;
+		pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/device");
+		pvmeta_tmp->v->v.i = alt_device;
+	} else if (device != prim_device)
+		alt_device = prim_device;
+
+	/* it is an alternate device */
+	if (device > 0 && v && v->v.i == device)
+		pvmeta_tmp->v = v->next;
+	else while (device > 0 && pvmeta_tmp && v) {
+		if (v->next && v->next->v.i == device)
+			v->next = v->next->next;
+		v = v->next;
+	}
+
+	return alt_device;
+}
+
 static response pv_gone(lvmetad_state *s, request r)
 {
 	const char *pvid = daemon_request_str(r, "uuid", NULL);
 	int64_t device = daemon_request_int(r, "device", 0);
+	int64_t alt_device = 0;
 	struct dm_config_tree *pvmeta;
-	char *pvid_old, *vgid;
+	char *vgid;

 	DEBUGLOG(s, "pv_gone: %s / %" PRIu64, pvid, device);

@@ -792,15 +999,18 @@ static response pv_gone(lvmetad_state *s, request r)

 	DEBUGLOG(s, "pv_gone (updated): %s / %" PRIu64, pvid, device);

-	pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
-	pvid_old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
+	if (!(pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid)))
+		return reply_unknown("PVID does not exist");
 	vgid = dm_hash_lookup(s->pvid_to_vgid, pvid);

 	dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device));
-	dm_hash_remove(s->pvid_to_pvmeta, pvid);
-	unlock_pvid_to_pvmeta(s);

-	dm_free(pvid_old);
+	if (!(alt_device = device_remove(s, pvmeta, device)))
+		dm_hash_remove(s->pvid_to_pvmeta, pvid);
+
+	DEBUGLOG(s, "pv_gone alt_device = %" PRIu64, alt_device);
+
+	unlock_pvid_to_pvmeta(s);

 	if (vgid) {
 		if (!(vgid = dm_strdup(vgid)))
@@ -812,12 +1022,15 @@ static response pv_gone(lvmetad_state *s, request r)
 		dm_free(vgid);
 	}

-	if (!pvmeta)
-		return reply_unknown("PVID does not exist");
+	if (!alt_device)
+		dm_config_destroy(pvmeta);

-	dm_config_destroy(pvmeta);
-
-	return daemon_reply_simple("OK", NULL);
+	if (alt_device) {
+		return daemon_reply_simple("OK",
+					   "device = %"PRId64, alt_device,
+					   NULL);
+	} else
+		return daemon_reply_simple("OK", NULL );
 }

 static response pv_clear_all(lvmetad_state *s, request r)
@@ -845,11 +1058,11 @@ static response pv_found(lvmetad_state *s, request r)
 	const char *vgname = daemon_request_str(r, "vgname", NULL);
 	const char *vgid = daemon_request_str(r, "metadata/id", NULL);
 	const char *vgid_old = NULL;
-	struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta");
+	struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta"), *altdev = NULL;
+	struct dm_config_value *altdev_v;
 	uint64_t device, device_old_pvid = 0;
 	struct dm_config_tree *cft, *pvmeta_old_dev = NULL, *pvmeta_old_pvid = NULL;
 	char *old;
-	char *pvid_dup;
 	int complete = 0, orphan = 0;
 	int64_t seqno = -1, seqno_old = -1, changed = 0;

@@ -861,12 +1074,8 @@ static response pv_found(lvmetad_state *s, request r)
 	if (!dm_config_get_uint64(pvmeta, "pvmeta/device", &device))
 		return reply_fail("need PV device number");

-	if (!(cft = dm_config_create()) ||
-	    (!(pvid_dup = dm_strdup(pvid)))) {
-		if (cft)
-			dm_config_destroy(cft);
+	if (!(cft = dm_config_create()))
 		return reply_fail("out of memory");
-	}

 	lock_pvid_to_pvmeta(s);

@@ -875,7 +1084,6 @@ static response pv_found(lvmetad_state *s, request r)

 	if ((old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)))) {
 		pvmeta_old_dev = dm_hash_lookup(s->pvid_to_pvmeta, old);
-		dm_hash_remove(s->pvid_to_pvmeta, old);
 		vgid_old = dm_hash_lookup(s->pvid_to_vgid, old);
 	}

@@ -885,35 +1093,69 @@ static response pv_found(lvmetad_state *s, request r)
 	if (!(cft->root = dm_config_clone_node(cft, pvmeta, 0)))
                goto out_of_mem;

+	pvid = dm_config_find_str(cft->root, "pvmeta/id", NULL);
+
 	if (!pvmeta_old_pvid || compare_config(pvmeta_old_pvid->root, cft->root))
 		changed |= 1;

 	if (pvmeta_old_pvid && device != device_old_pvid) {
-		DEBUGLOG(s, "pv %s no longer on device %" PRIu64, pvid, device_old_pvid);
-		dm_free(dm_hash_lookup_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid)));
+		DEBUGLOG(s, "PV %s duplicated on device %" PRIu64, pvid, device_old_pvid);
 		dm_hash_remove_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid));
+		if (!dm_hash_insert_binary(s->device_to_pvid, &device_old_pvid,
+					   sizeof(device_old_pvid), (void*)pvid))
+			goto out_of_mem;
+		if ((altdev = dm_config_find_node(pvmeta_old_pvid->root, "pvmeta/devices_alternate"))) {
+			altdev = dm_config_clone_node(cft, altdev, 0);
+			chain_node(altdev, cft->root, 0);
+		} else
+			if (!(altdev = make_config_node(cft, "devices_alternate", cft->root, 0)))
+				goto out_of_mem;
+                altdev_v = altdev->v;
+                while (1) {
+			if (altdev_v && altdev_v->v.i == device_old_pvid)
+				break;
+			if (altdev_v)
+				altdev_v = altdev_v->next;
+			if (!altdev_v) {
+				if (!(altdev_v = dm_config_create_value(cft)))
+					goto out_of_mem;
+				altdev_v->next = altdev->v;
+				altdev->v = altdev_v;
+				altdev->v->v.i = device_old_pvid;
+				break;
+			}
+		};
+		altdev_v = altdev->v;
+		while (altdev_v) {
+			if (altdev_v->next && altdev_v->next->v.i == device)
+				altdev_v->next = altdev_v->next->next;
+			altdev_v = altdev_v->next;
+		}
 		changed |= 1;
 	}

 	if (!dm_hash_insert(s->pvid_to_pvmeta, pvid, cft) ||
-	    !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid_dup)) {
+	    !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid)) {
 		dm_hash_remove(s->pvid_to_pvmeta, pvid);
 out_of_mem:
 		unlock_pvid_to_pvmeta(s);
 		dm_config_destroy(cft);
-		dm_free(pvid_dup);
 		dm_free(old);
 		return reply_fail("out of memory");
 	}

 	unlock_pvid_to_pvmeta(s);

-	dm_free(old);
-
 	if (pvmeta_old_pvid)
 		dm_config_destroy(pvmeta_old_pvid);
-	if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid)
-		dm_config_destroy(pvmeta_old_dev);
+	if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid) {
+		dev_t d = dm_config_find_int64(pvmeta_old_dev->root, "pvmeta/device", 0);
+		WARN(s, "pv_found: stray device %"PRId64, d);
+		if (!device_remove(s, pvmeta_old_dev, device)) {
+			dm_hash_remove(s->pvid_to_pvmeta, old);
+			dm_config_destroy(pvmeta_old_dev);
+		}
+	}

 	if (metadata) {
 		if (!vgid)
@@ -924,7 +1166,7 @@ out_of_mem:
 		if (daemon_request_int(r, "metadata/seqno", -1) < 0)
 			return reply_fail("need VG seqno");

-		if (!update_metadata(s, vgname, vgid, metadata, &seqno_old))
+		if (!update_metadata(s, vgname, vgid, metadata, &seqno_old, pvid))
 			return reply_fail("metadata update failed");
 		changed |= (seqno_old != dm_config_find_int(metadata, "metadata/seqno", -1));
 	} else {
@@ -972,6 +1214,39 @@ out_of_mem:
 				   NULL);
 }

+static response vg_clear_outdated_pvs(lvmetad_state *s, request r)
+{
+	struct dm_config_tree *outdated_pvs;
+	const char *vgid = daemon_request_str(r, "vgid", NULL);
+
+	if (!vgid)
+		return reply_fail("need VG UUID");
+
+	if ((outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid))) {
+		dm_config_destroy(outdated_pvs);
+		dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
+	}
+	return daemon_reply_simple("OK", NULL);
+}
+
+static void vg_info_update(lvmetad_state *s, const char *uuid,
+                           struct dm_config_node *metadata)
+{
+	struct vg_info *info;
+	int64_t cache_version;
+
+	cache_version = dm_config_find_int64(metadata, "metadata/seqno", -1);
+	if (cache_version == -1)
+		return;
+
+	info = (struct vg_info *) dm_hash_lookup(s->vgid_to_info, uuid);
+	if (!info)
+		return;
+
+	if (cache_version >= info->external_version)
+		info->flags &= ~VGFL_INVALID;
+}
+
 static response vg_update(lvmetad_state *s, request r)
 {
 	struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
@@ -987,8 +1262,10 @@ static response vg_update(lvmetad_state *s, request r)

 		/* TODO defer metadata update here; add a separate vg_commit
 		 * call; if client does not commit, die */
-		if (!update_metadata(s, vgname, vgid, metadata, NULL))
+		if (!update_metadata(s, vgname, vgid, metadata, NULL, NULL))
 			return reply_fail("metadata update failed");
+
+		vg_info_update(s, vgid, metadata);
 	}
 	return daemon_reply_simple("OK", NULL);
 }
@@ -1009,6 +1286,71 @@ static response vg_remove(lvmetad_state *s, request r)
 	return daemon_reply_simple("OK", NULL);
 }

+static response set_global_info(lvmetad_state *s, request r)
+{
+	const int global_invalid = daemon_request_int(r, "global_invalid", -1);
+
+	if (global_invalid == 1)
+		s->flags |= GLFL_INVALID;
+
+	else if (global_invalid == 0)
+		s->flags &= ~GLFL_INVALID;
+
+	return daemon_reply_simple("OK", NULL);
+}
+
+static response get_global_info(lvmetad_state *s, request r)
+{
+	return daemon_reply_simple("OK", "global_invalid = %d",
+					 (s->flags & GLFL_INVALID) ? 1 : 0,
+					 NULL);
+}
+
+static response set_vg_info(lvmetad_state *s, request r)
+{
+	struct dm_config_tree *vg;
+	struct vg_info *info;
+	const char *uuid = daemon_request_str(r, "uuid", NULL);
+	const int64_t new_version = daemon_request_int(r, "version", -1);
+	int64_t cache_version;
+
+	if (!uuid)
+		goto out;
+
+	if (new_version == -1)
+		goto out;
+
+	vg = dm_hash_lookup(s->vgid_to_metadata, uuid);
+	if (!vg)
+		goto out;
+
+	if (!new_version)
+		goto inval;
+
+	cache_version = dm_config_find_int64(vg->root, "metadata/seqno", -1);
+
+	if (cache_version != -1 && new_version != -1 && cache_version >= new_version)
+		goto out;
+inval:
+	info = dm_hash_lookup(s->vgid_to_info, uuid);
+	if (!info) {
+		info = malloc(sizeof(struct vg_info));
+		if (!info)
+			goto bad;
+		memset(info, 0, sizeof(struct vg_info));
+		if (!dm_hash_insert(s->vgid_to_info, uuid, (void*)info))
+			goto bad;
+	}
+
+	info->external_version = new_version;
+	info->flags |= VGFL_INVALID;
+
+out:
+	return daemon_reply_simple("OK", NULL);
+bad:
+	return reply_fail("out of memory");
+}
+
 static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr)
 {
 	struct dm_hash_node *n;
@@ -1035,7 +1377,7 @@ static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char
 			   *val = dm_hash_get_data(ht, n);
 		buffer_append(buf, "    ");
 		if (int_key)
-			(void) dm_asprintf(&append, "%d = \"%s\"", *(int*)key, val);
+			(void) dm_asprintf(&append, "%d = \"%s\"", *(const int*)key, val);
 		else
 			(void) dm_asprintf(&append, "%s = \"%s\"", key, val);
 		if (append)
@@ -1046,6 +1388,52 @@ static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char
 	buffer_append(buf, "}\n");
 }

+static void _dump_info_version(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+	char *append;
+	struct dm_hash_node *n = dm_hash_get_first(ht);
+	struct vg_info *info;
+
+	buffer_append(buf, name);
+	buffer_append(buf, " {\n");
+
+	while (n) {
+		const char *key = dm_hash_get_key(ht, n);
+		info = dm_hash_get_data(ht, n);
+		buffer_append(buf, "    ");
+		(void) dm_asprintf(&append, "%s = %lld", key, (long long)info->external_version);
+		if (append)
+			buffer_append(buf, append);
+		buffer_append(buf, "\n");
+		dm_free(append);
+		n = dm_hash_get_next(ht, n);
+	}
+	buffer_append(buf, "}\n");
+}
+
+static void _dump_info_flags(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+	char *append;
+	struct dm_hash_node *n = dm_hash_get_first(ht);
+	struct vg_info *info;
+
+	buffer_append(buf, name);
+	buffer_append(buf, " {\n");
+
+	while (n) {
+		const char *key = dm_hash_get_key(ht, n);
+		info = dm_hash_get_data(ht, n);
+		buffer_append(buf, "    ");
+		(void) dm_asprintf(&append, "%s = %llx", key, (long long)info->flags);
+		if (append)
+			buffer_append(buf, append);
+		buffer_append(buf, "\n");
+		dm_free(append);
+		n = dm_hash_get_next(ht, n);
+	}
+	buffer_append(buf, "}\n");
+}
+
 static response dump(lvmetad_state *s)
 {
 	response res = { 0 };
@@ -1068,6 +1456,9 @@ static response dump(lvmetad_state *s)
 	buffer_append(b, "\n# VGID to VGNAME mapping\n\n");
 	_dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0);

+	buffer_append(b, "\n# VGID to outdated PVs mapping\n\n");
+	_dump_cft(b, s->vgid_to_outdated_pvs, "outdated_pvs/vgid");
+
 	buffer_append(b, "\n# VGNAME to VGID mapping\n\n");
 	_dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0);

@@ -1077,6 +1468,12 @@ static response dump(lvmetad_state *s)
 	buffer_append(b, "\n# DEVICE to PVID mapping\n\n");
 	_dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1);

+	buffer_append(b, "\n# VGID to INFO version mapping\n\n");
+	_dump_info_version(b, s->vgid_to_info, "vgid_to_info", 0);
+
+	buffer_append(b, "\n# VGID to INFO flags mapping\n\n");
+	_dump_info_flags(b, s->vgid_to_info, "vgid_to_info", 0);
+
 	unlock_pvid_to_vgid(s);
 	unlock_pvid_to_pvmeta(s);
 	unlock_vgid_to_metadata(s);
@@ -1098,7 +1495,7 @@ static response handler(daemon_state s, client_handle h, request r)
 		return daemon_reply_simple("OK", NULL);
 	}

-	if (strcmp(token, state->token) && strcmp(rq, "dump")) {
+	if (strcmp(token, state->token) && strcmp(rq, "dump") && strcmp(token, "skip")) {
 		pthread_mutex_unlock(&state->token_lock);
 		return daemon_reply_simple("token_mismatch",
 					   "expected = %s", state->token,
@@ -1127,6 +1524,9 @@ static response handler(daemon_state s, client_handle h, request r)
 	if (!strcmp(rq, "vg_update"))
 		return vg_update(state, r);

+	if (!strcmp(rq, "vg_clear_outdated_pvs"))
+		return vg_clear_outdated_pvs(state, r);
+
 	if (!strcmp(rq, "vg_remove"))
 		return vg_remove(state, r);

@@ -1139,6 +1539,15 @@ static response handler(daemon_state s, client_handle h, request r)
 	if (!strcmp(rq, "vg_list"))
 		return vg_list(state, r);

+	if (!strcmp(rq, "set_global_info"))
+		return set_global_info(state, r);
+
+	if (!strcmp(rq, "get_global_info"))
+		return get_global_info(state, r);
+
+	if (!strcmp(rq, "set_vg_info"))
+		return set_vg_info(state, r);
+
 	if (!strcmp(rq, "dump"))
 		return dump(state);

--- a/daemons/lvmetad/testclient.c
+++ b/daemons/lvmetad/testclient.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2011-2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "tool.h"
+
 #include "lvmetad-client.h"
 #include "label.h"
 #include "lvmcache.h"
@@ -105,15 +120,17 @@ void _dump_vg(daemon_handle h, const char *uuid)

 int main(int argc, char **argv) {
 	daemon_handle h = lvmetad_open();
+	/* FIXME Missing error path */

 	if (argc > 1) {
 		int i;
-		struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0);
+		struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0, 1, 1);
 		for (i = 1; i < argc; ++i) {
 			const char *uuid = NULL;
 			scan(h, argv[i]);
 		}
 		destroy_toolcontext(cmd);
+		/* FIXME Missing lvmetad_close() */
 		return 0;
 	}

@@ -122,6 +139,6 @@ int main(int argc, char **argv) {
 	_dump_vg(h, vgid);
 	_pv_add(h, uuid3, NULL);

-	daemon_close(h);
+	daemon_close(h);	/* FIXME lvmetad_close? */
 	return 0;
 }
--- a/daemons/lvmlockd/.gitignore
+++ b/daemons/lvmlockd/.gitignore
@@ -0,0 +1,2 @@
+lvmlockctl
+lvmlockd
--- a/daemons/lvmlockd/Makefile.in
+++ b/daemons/lvmlockd/Makefile.in
@@ -0,0 +1,66 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmlockd-core.c
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+  SOURCES += lvmlockd-sanlock.c
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+  SOURCES += lvmlockd-dlm.c
+endif
+
+TARGETS = lvmlockd lvmlockctl
+
+.PHONY: install_lvmlockd
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+  LIBS += -lsanlock_client
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+  LIBS += -ldlm_lt
+endif
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+
+lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmlockctl: lvmlockctl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmlockctl.o $(LVMLIBS)
+
+install_lvmlockd: lvmlockd
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvmlockctl: lvmlockctl
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmlockd install_lvmlockctl
+
+install: install_lvm2
--- a/daemons/lvmlockd/lvmlockctl.c
+++ b/daemons/lvmlockd/lvmlockctl.c
@@ -0,0 +1,745 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmlockd-client.h"
+
+#include <stddef.h>
+#include <getopt.h>
+#include <signal.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syslog.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+static int quit = 0;
+static int info = 0;
+static int dump = 0;
+static int wait_opt = 0;
+static int force_opt = 0;
+static int kill_vg = 0;
+static int drop_vg = 0;
+static int gl_enable = 0;
+static int gl_disable = 0;
+static int stop_lockspaces = 0;
+static char *arg_vg_name = NULL;
+
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE+1];
+static int dump_len;
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+daemon_handle _lvmlockd;
+
+#define log_error(fmt, args...) \
+do { \
+	printf(fmt "\n", ##args); \
+} while (0)
+
+#define MAX_LINE 512
+
+/* copied from lvmlockd-internal.h */
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+/*
+ * lvmlockd dumps the client info before the lockspaces,
+ * so we can look up client info when printing lockspace info.
+ */
+
+#define MAX_CLIENTS 100
+
+struct client_info {
+	uint32_t client_id;
+	int pid;
+	char name[MAX_NAME+1];
+};
+
+static struct client_info clients[MAX_CLIENTS];
+static int num_clients;
+
+static void save_client_info(char *line)
+{
+	uint32_t pid = 0;
+	int fd = 0;
+	int pi = 0;
+	uint32_t client_id = 0;
+	char name[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=client pid=%u fd=%d pi=%d id=%u name=%s",
+	       &pid, &fd, &pi, &client_id, name);
+
+	clients[num_clients].client_id = client_id;
+	clients[num_clients].pid = pid;
+	strcpy(clients[num_clients].name, name);
+	num_clients++;
+}
+
+static void find_client_info(uint32_t client_id, uint32_t *pid, char *cl_name)
+{
+	int i;
+
+	for (i = 0; i < num_clients; i++) {
+		if (clients[i].client_id == client_id) {
+			*pid = clients[i].pid;
+			strcpy(cl_name, clients[i].name);
+			return;
+		}
+	}
+}
+
+static int first_ls = 1;
+
+static void format_info_ls(char *line)
+{
+	char ls_name[MAX_NAME+1] = { 0 };
+	char vg_name[MAX_NAME+1] = { 0 };
+	char vg_uuid[MAX_NAME+1] = { 0 };
+	char vg_sysid[MAX_NAME+1] = { 0 };
+	char lock_args[MAX_ARGS+1] = { 0 };
+	char lock_type[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=ls ls_name=%s vg_name=%s vg_uuid=%s vg_sysid=%s vg_args=%s lm_type=%s",
+	       ls_name, vg_name, vg_uuid, vg_sysid, lock_args, lock_type);
+
+	if (!first_ls)
+		printf("\n");
+	first_ls = 0;
+
+	printf("VG %s lock_type=%s %s\n", vg_name, lock_type, vg_uuid);
+
+	printf("LS %s %s\n", lock_type, ls_name);
+}
+
+static void format_info_ls_action(char *line)
+{
+	uint32_t client_id = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	char version[MAX_NAME+1] = { 0 };
+	char op[MAX_NAME+1] = { 0 };
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=ls_action client_id=%u %s %s op=%s",
+	       &client_id, flags, version, op);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	printf("OP %s pid %u (%s)\n", op, pid, cl_name);
+}
+
+static void format_info_r(char *line, char *r_name_out, char *r_type_out)
+{
+	char r_name[MAX_NAME+1] = { 0 };
+	char r_type[4] = { 0 };
+	char mode[4] = { 0 };
+	char sh_count[MAX_NAME+1] = { 0 };
+	uint32_t ver = 0;
+
+	sscanf(line, "info=r name=%s type=%s mode=%s %s version=%u",
+	       r_name, r_type, mode, sh_count, &ver);
+
+	/* when mode is not un, wait and print each lk line */
+
+	if (strcmp(mode, "un")) {
+		strcpy(r_name_out, r_name);
+		strcpy(r_type_out, r_type);
+		return;
+	}
+
+	/* when mode is un, there will be no lk lines, so print now */
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LK GL un ver %u\n", ver);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LK VG un ver %u\n", ver);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LK LV un %s\n", r_name);
+	}
+}
+
+static void format_info_lk(char *line, char *r_name, char *r_type)
+{
+	char mode[4] = { 0 };
+	uint32_t ver = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	uint32_t client_id = 0;
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	if (!r_name[0] || !r_type[0]) {
+		printf("format_info_lk error r_name %s r_type %s\n", r_name, r_type);
+		printf("%s\n", line);
+		return;
+	}
+
+	sscanf(line, "info=lk mode=%s version=%u %s client_id=%u",
+	       mode, &ver, flags, &client_id);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LK GL %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LK VG %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LK LV %s %s\n", mode, r_name);
+	}
+}
+
+static void format_info_r_action(char *line, char *r_name, char *r_type)
+{
+	uint32_t client_id = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	char version[MAX_NAME+1] = { 0 };
+	char op[MAX_NAME+1] = { 0 };
+	char rt[4] = { 0 };
+	char mode[4] = { 0 };
+	char lm[MAX_NAME+1] = { 0 };
+	char result[MAX_NAME+1] = { 0 };
+	char lm_rv[MAX_NAME+1] = { 0 };
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	if (!r_name[0] || !r_type[0]) {
+		printf("format_info_r_action error r_name %s r_type %s\n", r_name, r_type);
+		printf("%s\n", line);
+		return;
+	}
+
+	sscanf(line, "info=r_action client_id=%u %s %s op=%s rt=%s mode=%s %s %s %s",
+	       &client_id, flags, version, op, rt, mode, lm, result, lm_rv);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	if (strcmp(op, "lock")) {
+		printf("OP %s pid %u (%s)", op, pid, cl_name);
+		return;
+	}
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LW GL %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LW VG %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LW LV %s %s\n", mode, r_name);
+	}
+}
+
+static void format_info_line(char *line, char *r_name, char *r_type)
+{
+	if (!strncmp(line, "info=structs ", strlen("info=structs "))) {
+		/* only print this in the raw info dump */
+
+	} else if (!strncmp(line, "info=client ", strlen("info=client "))) {
+		save_client_info(line);
+
+	} else if (!strncmp(line, "info=ls ", strlen("info=ls "))) {
+		format_info_ls(line);
+
+	} else if (!strncmp(line, "info=ls_action ", strlen("info=ls_action "))) {
+		format_info_ls_action(line);
+
+	} else if (!strncmp(line, "info=r ", strlen("info=r "))) {
+		/*
+		 * r_name/r_type are reset when a new resource is found.
+		 * They are reused for the lock and action lines that
+		 * follow a resource line.
+		 */
+		memset(r_name, 0, MAX_NAME+1);
+		memset(r_type, 0, MAX_NAME+1);
+		format_info_r(line, r_name, r_type);
+
+	} else if (!strncmp(line, "info=lk ", strlen("info=lk "))) {
+		/* will use info from previous r */
+		format_info_lk(line, r_name, r_type);
+
+	} else if (!strncmp(line, "info=r_action ", strlen("info=r_action "))) {
+		/* will use info from previous r */
+		format_info_r_action(line, r_name, r_type);
+	} else {
+		printf("UN %s\n", line);
+	}
+}
+
+static void format_info(void)
+{
+	char line[MAX_LINE];
+	char r_name[MAX_NAME+1];
+	char r_type[MAX_NAME+1];
+	int i, j;
+
+	j = 0;
+	memset(line, 0, sizeof(line));
+
+	for (i = 0; i < dump_len; i++) {
+		line[j++] = dump_buf[i];
+
+		if ((line[j-1] == '\n') || (line[j-1] == '\0')) {
+			format_info_line(line, r_name, r_type);
+			j = 0;
+			memset(line, 0, sizeof(line));
+		}
+	}
+}
+
+
+static daemon_reply _lvmlockd_send(const char *req_name, ...)
+{
+	va_list ap;
+	daemon_reply repl;
+	daemon_request req;
+
+	req = daemon_request_make(req_name);
+
+	va_start(ap, req_name);
+	daemon_request_extend_v(req, ap);
+	va_end(ap);
+
+	repl = daemon_send(_lvmlockd, req);
+
+	daemon_request_destroy(req);
+
+	return repl;
+}
+
+/* See the same in lib/locking/lvmlockd.c */
+#define NO_LOCKD_RESULT -1000
+
+static int _lvmlockd_result(daemon_reply reply, int *result)
+{
+	int reply_result;
+
+	if (reply.error) {
+		log_error("lvmlockd_result reply error %d", reply.error);
+		return 0;
+	}
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+		log_error("lvmlockd_result bad response");
+		return 0;
+	}
+
+	reply_result = daemon_reply_int(reply, "op_result", NO_LOCKD_RESULT);
+	if (reply_result == -1000) {
+		log_error("lvmlockd_result no op_result");
+		return 0;
+	}
+
+	*result = reply_result;
+
+	return 1;
+}
+
+static int do_quit(void)
+{
+	daemon_reply reply;
+	int rv = 0;
+
+	reply = daemon_send_simple(_lvmlockd, "quit", NULL);
+
+	if (reply.error) {
+		log_error("reply error %d", reply.error);
+		rv = reply.error;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int setup_dump_socket(void)
+{
+	int s, rv;
+
+	s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+	if (s < 0)
+		return s;
+
+	memset(&dump_addr, 0, sizeof(dump_addr));
+	dump_addr.sun_family = AF_LOCAL;
+	strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+	dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+	rv = bind(s, (struct sockaddr *) &dump_addr, dump_addrlen);
+	if (rv < 0) {
+		if (!close(s))
+			log_error("failed to close dump socket");
+		return rv;
+	}
+
+	return s;
+}
+
+static int do_dump(const char *req_name)
+{
+	daemon_reply reply;
+	int result;
+	int fd, rv = 0;
+
+	fd = setup_dump_socket();
+	if (fd < 0) {
+		log_error("socket error %d", fd);
+		return fd;
+	}
+
+	reply = daemon_send_simple(_lvmlockd, req_name, NULL);
+
+	if (reply.error) {
+		log_error("reply error %d", reply.error);
+		rv = reply.error;
+		goto out;
+	}
+
+	result = daemon_reply_int(reply, "result", 0);
+	dump_len = daemon_reply_int(reply, "dump_len", 0);
+
+	daemon_reply_destroy(reply);
+
+	if (result < 0) {
+		rv = result;
+		log_error("result %d", result);
+	}
+
+	if (!dump_len)
+		goto out;
+
+	memset(dump_buf, 0, sizeof(dump_buf));
+
+	rv = recvfrom(fd, dump_buf, dump_len, MSG_WAITALL,
+		      (struct sockaddr *)&dump_addr, &dump_addrlen);
+	if (rv < 0) {
+		log_error("recvfrom error %d %d", rv, errno);
+		rv = -errno;
+		goto out;
+	}
+
+	rv = 0;
+	if ((info && dump) || !strcmp(req_name, "dump"))
+		printf("%s\n", dump_buf);
+	else
+		format_info();
+out:
+	if (close(fd))
+		log_error("failed to close dump socket %d", fd);
+	return rv;
+}
+
+static int do_able(const char *req_name)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	reply = _lvmlockd_send(req_name,
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int do_stop_lockspaces(void)
+{
+	daemon_reply reply;
+	char opts[32];
+	int result;
+	int rv;
+
+	memset(opts, 0, sizeof(opts));
+
+	if (wait_opt)
+		strcat(opts, "wait ");
+	if (force_opt)
+		strcat(opts, "force ");
+
+	reply = _lvmlockd_send("stop_all",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"opts = %s", opts[0] ? opts : "none",
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int do_kill(void)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	syslog(LOG_EMERG, "Lost access to sanlock lease storage in VG %s.", arg_vg_name);
+	/* These two lines explain the manual alternative to the FIXME below. */
+	syslog(LOG_EMERG, "Immediately deactivate LVs in VG %s.", arg_vg_name);
+	syslog(LOG_EMERG, "Once VG is unused, run lvmlockctl --drop %s.", arg_vg_name);
+
+	/*
+	 * It may not be strictly necessary to notify lvmlockd of the kill, but
+	 * lvmlockd can use this information to avoid attempting any new lock
+	 * requests in the VG (which would fail anyway), and can return an
+	 * error indicating that the VG has been killed.
+	 */
+
+	reply = _lvmlockd_send("kill_vg",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+
+	/*
+	 * FIXME: here is where we should implement a strong form of
+	 * blkdeactivate, and if it completes successfully, automatically call
+	 * do_drop() afterward.  (The drop step may not always be necessary
+	 * if the lvm commands run while shutting things down release all the
+	 * leases.)
+	 *
+	 * run_strong_blkdeactivate();
+	 * do_drop();
+	 */
+
+	return rv;
+}
+
+static int do_drop(void)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	syslog(LOG_WARNING, "Dropping locks for VG %s.", arg_vg_name);
+
+	/*
+	 * Check for misuse by looking for any active LVs in the VG
+	 * and refusing this operation if found?  One possible way
+	 * to kill LVs (e.g. if fs cannot be unmounted) is to suspend
+	 * them, or replace them with the error target.  In that
+	 * case the LV will still appear to be active, but it is
+	 * safe to release the lock.
+	 */
+
+	reply = _lvmlockd_send("drop_vg",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static void print_usage(void)
+{
+	printf("lvmlockctl options\n");
+	printf("Options:\n");
+	printf("--help | -h\n");
+	printf("      Show this help information.\n");
+	printf("--quit | -q\n");
+	printf("      Tell lvmlockd to quit.\n");
+	printf("--info | -i\n");
+	printf("      Print lock state information from lvmlockd.\n");
+	printf("--dump | -d\n");
+	printf("      Print log buffer from lvmlockd.\n");
+	printf("--wait | -w 0|1\n");
+	printf("      Wait option for other commands.\n");
+	printf("--force | -f 0|1>\n");
+	printf("      Force option for other commands.\n");
+	printf("--kill | -k <vg_name>\n");
+	printf("      Kill access to the vg when sanlock cannot renew lease.\n");
+	printf("--drop | -r <vg_name>\n");
+	printf("      Clear locks for the vg after it has been killed and is no longer used.\n");
+	printf("--gl-enable <vg_name>\n");
+	printf("      Tell lvmlockd to enable the global lock in a sanlock vg.\n");
+	printf("--gl-disable <vg_name>\n");
+	printf("      Tell lvmlockd to disable the global lock in a sanlock vg.\n");
+	printf("--stop-lockspaces | -S\n");
+	printf("      Stop all lockspaces.\n");
+}
+
+static int read_options(int argc, char *argv[])
+{
+	int option_index = 0;
+	int c;
+
+	static struct option long_options[] = {
+		{"help",            no_argument,       0,  'h' },
+		{"quit",            no_argument,       0,  'q' },
+		{"info",            no_argument,       0,  'i' },
+		{"dump",            no_argument,       0,  'd' },
+		{"wait",            required_argument, 0,  'w' },
+		{"force",           required_argument, 0,  'f' },
+		{"kill",            required_argument, 0,  'k' },
+		{"drop",            required_argument, 0,  'r' },
+		{"gl-enable",       required_argument, 0,  'E' },
+		{"gl-disable",      required_argument, 0,  'D' },
+		{"stop-lockspaces", no_argument,       0,  'S' },
+		{0, 0, 0, 0 }
+	};
+
+	if (argc == 1) {
+		print_usage();
+		exit(0);
+	}
+
+	while (1) {
+		c = getopt_long(argc, argv, "hqidE:D:w:k:r:S", long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'h':
+			/* --help */
+			print_usage();
+			exit(0);
+		case 'q':
+			/* --quit */
+			quit = 1;
+			break;
+		case 'i':
+			/* --info */
+			info = 1;
+			break;
+		case 'd':
+			/* --dump */
+			dump = 1;
+			break;
+		case 'w':
+			wait_opt = atoi(optarg);
+			break;
+		case 'k':
+			kill_vg = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'r':
+			drop_vg = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'E':
+			gl_enable = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'D':
+			gl_disable = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'S':
+			stop_lockspaces = 1;
+			break;
+		default:
+			print_usage();
+			exit(1);
+		}
+	}
+
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int rv = 0;
+
+	rv = read_options(argc, argv);
+	if (rv < 0)
+		return rv;
+
+	_lvmlockd = lvmlockd_open(NULL);
+
+	if (_lvmlockd.socket_fd < 0 || _lvmlockd.error) {
+		log_error("Cannot connect to lvmlockd.");
+		return -1;
+	}
+
+	if (quit) {
+		rv = do_quit();
+		goto out;
+	}
+
+	if (info) {
+		rv = do_dump("info");
+		goto out;
+	}
+
+	if (dump) {
+		rv = do_dump("dump");
+		goto out;
+	}
+
+	if (kill_vg) {
+		rv = do_kill();
+		goto out;
+	}
+
+	if (drop_vg) {
+		rv = do_drop();
+		goto out;
+	}
+
+	if (gl_enable) {
+		rv = do_able("enable_gl");
+		goto out;
+	}
+
+	if (gl_disable) {
+		rv = do_able("disable_gl");
+		goto out;
+	}
+
+	if (stop_lockspaces) {
+		rv = do_stop_lockspaces();
+		goto out;
+	}
+
+out:
+	lvmlockd_close(_lvmlockd);
+	return rv;
+}
--- a/daemons/lvmlockd/lvmlockd-client.h
+++ b/daemons/lvmlockd/lvmlockd-client.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_CLIENT_H
+#define _LVM_LVMLOCKD_CLIENT_H
+
+#include "daemon-client.h"
+
+#define LVMLOCKD_SOCKET DEFAULT_RUN_DIR "/lvmlockd.socket"
+
+/* Wrappers to open/close connection */
+
+static inline daemon_handle lvmlockd_open(const char *sock)
+{
+	daemon_info lvmlockd_info = {
+		.path = "lvmlockd",
+		.socket = sock ?: LVMLOCKD_SOCKET,
+		.protocol = "lvmlockd",
+		.protocol_version = 1,
+		.autostart = 0
+	};
+
+	return daemon_open(lvmlockd_info);
+}
+
+static inline void lvmlockd_close(daemon_handle h)
+{
+	return daemon_close(h);
+}
+
+/*
+ * Errors returned as the lvmlockd result value.
+ */
+#define ENOLS     210 /* lockspace not found */
+#define ESTARTING 211 /* lockspace is starting */
+#define EARGS     212
+#define EHOSTID   213
+#define EMANAGER  214
+#define EPREPARE  215
+#define ELOCKD    216
+#define EVGKILLED 217 /* sanlock lost access to leases and VG is killed. */
+#define ELOCKIO   218 /* sanlock io errors during lock op, may be transient. */
+
+#endif	/* _LVM_LVMLOCKD_CLIENT_H */
--- a/daemons/lvmlockd/lvmlockd-core.c
+++ b/daemons/lvmlockd/lvmlockd-core.c
--- a/daemons/lvmlockd/lvmlockd-dlm.c
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500  /* pthread */
+#define _ISOC99_SOURCE
+
+#include "tool.h"
+
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "xlate.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+/*
+ * Using synchronous _wait dlm apis so do not define _REENTRANT and
+ * link with non-threaded version of library, libdlm_lt.
+ */
+#include "libdlm.h"
+
+#include <pthread.h>
+#include <stddef.h>
+#include <poll.h>
+#include <errno.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <byteswap.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <sys/socket.h>
+
+struct lm_dlm {
+	dlm_lshandle_t *dh;
+};
+
+struct rd_dlm {
+	struct dlm_lksb lksb;
+	struct val_blk *vb;
+};
+
+int lm_data_size_dlm(void)
+{
+	return sizeof(struct rd_dlm);
+}
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for dlm is
+ * vg_version_string:undefined:cluster_name
+ *
+ * lv_lock_args are not used for dlm
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+static int cluster_name_from_args(char *vg_args, char *clustername)
+{
+	return last_string_from_args(vg_args, clustername);
+}
+
+static int check_args_version(char *vg_args)
+{
+	unsigned int major = 0;
+	int rv;
+
+	rv = version_from_args(vg_args, &major, NULL, NULL);
+	if (rv < 0) {
+		log_error("check_args_version %s error %d", vg_args, rv);
+		return rv;
+	}
+
+	if (major > VG_LOCK_ARGS_MAJOR) {
+		log_error("check_args_version %s major %d %d", vg_args, major, VG_LOCK_ARGS_MAJOR);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* This will be set after dlm_controld is started. */
+#define DLM_CLUSTER_NAME_PATH "/sys/kernel/config/dlm/cluster/cluster_name"
+
+static int read_cluster_name(char *clustername)
+{
+	static const char close_error_msg[] = "read_cluster_name: close_error %d";
+	char *n;
+	int fd;
+	int rv;
+
+	if (daemon_test) {
+		sprintf(clustername, "%s", "test");
+		return 0;
+	}
+
+	fd = open(DLM_CLUSTER_NAME_PATH, O_RDONLY);
+	if (fd < 0) {
+		log_debug("read_cluster_name: open error %d, check dlm_controld", fd);
+		return fd;
+	}
+
+	rv = read(fd, clustername, MAX_ARGS);
+	if (rv < 0) {
+		log_error("read_cluster_name: cluster name read error %d, check dlm_controld", fd);
+		if (close(fd))
+			log_error(close_error_msg, fd);
+		return rv;
+	}
+
+	n = strstr(clustername, "\n");
+	if (n)
+		*n = '\0';
+	if (close(fd))
+		log_error(close_error_msg, fd);
+	return 0;
+}
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	char clustername[MAX_ARGS+1];
+	char lock_args_version[MAX_ARGS+1];
+	int rv;
+
+	memset(clustername, 0, sizeof(clustername));
+	memset(lock_args_version, 0, sizeof(lock_args_version));
+
+	snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+		 VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+	rv = read_cluster_name(clustername);
+	if (rv < 0)
+		return -EMANAGER;
+
+	if (strlen(clustername) + strlen(lock_args_version) + 2 > MAX_ARGS) {
+		log_error("init_vg_dlm args too long");
+		return -EARGS;
+	}
+
+	snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, clustername);
+	rv = 0;
+
+	log_debug("init_vg_dlm done %s vg_args %s", ls_name, vg_args);
+	return rv;
+}
+
+int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+	char sys_clustername[MAX_ARGS+1];
+	char arg_clustername[MAX_ARGS+1];
+	struct lm_dlm *lmd;
+	int rv;
+
+	memset(sys_clustername, 0, sizeof(sys_clustername));
+	memset(arg_clustername, 0, sizeof(arg_clustername));
+
+	rv = read_cluster_name(sys_clustername);
+	if (rv < 0)
+		return -EMANAGER;
+
+	if (!ls->vg_args[0]) {
+		/* global lockspace has no vg args */
+		goto skip_args;
+	}
+
+	rv = check_args_version(ls->vg_args);
+	if (rv < 0)
+		return -EARGS;
+
+	rv = cluster_name_from_args(ls->vg_args, arg_clustername);
+	if (rv < 0) {
+		log_error("prepare_lockspace_dlm %s no cluster name from args %s", ls->name, ls->vg_args);
+		return -EARGS;
+	}
+
+	if (strcmp(sys_clustername, arg_clustername)) {
+		log_error("prepare_lockspace_dlm %s mismatching cluster names sys %s arg %s",
+			  ls->name, sys_clustername, arg_clustername);
+		return -EARGS;
+	}
+
+ skip_args:
+	lmd = malloc(sizeof(struct lm_dlm));
+	if (!lmd)
+		return -ENOMEM;
+
+	ls->lm_data = lmd;
+	return 0;
+}
+
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+
+	if (daemon_test)
+		return 0;
+
+	if (adopt)
+		lmd->dh = dlm_open_lockspace(ls->name);
+	else
+		lmd->dh = dlm_new_lockspace(ls->name, 0600, DLM_LSFL_NEWEXCL);
+
+	if (!lmd->dh) {
+		log_error("add_lockspace_dlm %s adopt %d error", ls->name, adopt);
+		free(lmd);
+		ls->lm_data = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	int rv;
+
+	if (daemon_test)
+		goto out;
+
+	/*
+	 * If free_vg is set, it means we are doing vgremove, and we may want
+	 * to tell any other nodes to leave the lockspace.  This is not really
+	 * necessary since there should be no harm in having an unused
+	 * lockspace sitting around.  A new "notification lock" would need to
+	 * be added with a callback to signal this. 
+	 */
+
+	rv = dlm_release_lockspace(ls->name, lmd->dh, 1);
+	if (rv < 0) {
+		log_error("rem_lockspace_dlm error %d", rv);
+		return rv;
+	}
+ out:
+	free(lmd);
+	ls->lm_data = NULL;
+
+	if (!strcmp(ls->name, gl_lsname_dlm))
+		gl_running_dlm = 0;
+
+	return 0;
+}
+
+static int lm_add_resource_dlm(struct lockspace *ls, struct resource *r, int with_lock_nl)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	uint32_t flags = 0;
+	char *buf;
+	int rv;
+
+	if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+		buf = malloc(sizeof(struct val_blk) + DLM_LVB_LEN);
+		if (!buf)
+			return -ENOMEM;
+		memset(buf, 0, sizeof(struct val_blk) + DLM_LVB_LEN);
+
+		rdd->vb = (struct val_blk *)buf;
+		rdd->lksb.sb_lvbptr = buf + sizeof(struct val_blk);
+
+		flags |= LKF_VALBLK;
+	}
+
+	if (!with_lock_nl)
+		goto out;
+
+	/* because this is a new NL lock request */
+	flags |= LKF_EXPEDITE;
+
+	if (daemon_test)
+		goto out;
+
+	rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, &rdd->lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv < 0) {
+		log_error("S %s R %s add_resource_dlm lock error %d", ls->name, r->name, rv);
+		return rv;
+	}
+ out:
+	return 0;
+}
+
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	int rv = 0;
+
+	if (daemon_test)
+		goto out;
+
+	lksb = &rdd->lksb;
+
+	if (!lksb->sb_lkid)
+		goto out;
+
+	rv = dlm_ls_unlock_wait(lmd->dh, lksb->sb_lkid, 0, lksb);
+	if (rv < 0) {
+		log_error("S %s R %s rem_resource_dlm unlock error %d", ls->name, r->name, rv);
+	}
+ out:
+	if (rdd->vb)
+		free(rdd->vb);
+
+	memset(rdd, 0, sizeof(struct rd_dlm));
+	r->lm_init = 0;
+	return rv;
+}
+
+static int to_dlm_mode(int ld_mode)
+{
+	switch (ld_mode) {
+	case LD_LK_EX:
+		return LKM_EXMODE;
+	case LD_LK_SH:
+		return LKM_PRMODE;
+	};
+	return -1;
+}
+
+static int lm_adopt_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+			uint32_t *r_version)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	uint32_t flags = 0;
+	int mode;
+	int rv;
+
+	*r_version = 0;
+
+	if (!r->lm_init) {
+		rv = lm_add_resource_dlm(ls, r, 0);
+		if (rv < 0)
+			return rv;
+		r->lm_init = 1;
+	}
+
+	lksb = &rdd->lksb;
+
+	flags |= LKF_PERSISTENT;
+	flags |= LKF_ORPHAN;
+
+	if (rdd->vb)
+		flags |= LKF_VALBLK;
+
+	mode = to_dlm_mode(ld_mode);
+	if (mode < 0) {
+		log_error("adopt_dlm invalid mode %d", ld_mode);
+		rv = -EINVAL;
+		goto fail;
+	}
+
+	log_debug("S %s R %s adopt_dlm", ls->name, r->name);
+
+	if (daemon_test)
+		return 0;
+
+	/*
+	 * dlm returns 0 for success, -EAGAIN if an orphan is
+	 * found with another mode, and -ENOENT if no orphan.
+	 *
+	 * cast/bast/param are (void *)1 because the kernel
+	 * returns errors if some are null.
+	 */
+
+	rv = dlm_ls_lockx(lmd->dh, mode, lksb, flags,
+			  r->name, strlen(r->name), 0,
+			  (void *)1, (void *)1, (void *)1,
+			  NULL, NULL);
+
+	if (rv == -EAGAIN) {
+		log_debug("S %s R %s adopt_dlm adopt mode %d try other mode",
+			  ls->name, r->name, ld_mode);
+		rv = -EUCLEAN;
+		goto fail;
+	}
+	if (rv < 0) {
+		log_debug("S %s R %s adopt_dlm mode %d flags %x error %d errno %d",
+			  ls->name, r->name, mode, flags, rv, errno);
+		goto fail;
+	}
+
+	/*
+	 * FIXME: For GL/VG locks we probably want to read the lvb,
+	 * especially if adopting an ex lock, because when we
+	 * release this adopted ex lock we may want to write new
+	 * lvb values based on the current lvb values (at lease
+	 * in the GL case where we increment the current values.)
+	 *
+	 * It should be possible to read the lvb by requesting
+	 * this lock in the same mode it's already in.
+	 */
+
+	return rv;
+
+ fail:
+	lm_rem_resource_dlm(ls, r);
+	return rv;
+}
+
+/*
+ * Use PERSISTENT so that if lvmlockd exits while holding locks,
+ * the locks will remain orphaned in the dlm, still protecting what
+ * they were acquired to protect.
+ */
+
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	struct val_blk vb;
+	uint32_t flags = 0;
+	uint16_t vb_version;
+	int mode;
+	int rv;
+
+	if (adopt) {
+		/* When adopting, we don't follow the normal method
+		   of acquiring a NL lock then converting it to the
+		   desired mode. */
+		return lm_adopt_dlm(ls, r, ld_mode, r_version);
+	}
+
+	if (!r->lm_init) {
+		rv = lm_add_resource_dlm(ls, r, 1);
+		if (rv < 0)
+			return rv;
+		r->lm_init = 1;
+	}
+
+	lksb = &rdd->lksb;
+
+	flags |= LKF_CONVERT;
+	flags |= LKF_NOQUEUE;
+	flags |= LKF_PERSISTENT;
+
+	if (rdd->vb)
+		flags |= LKF_VALBLK;
+
+	mode = to_dlm_mode(ld_mode);
+	if (mode < 0) {
+		log_error("lock_dlm invalid mode %d", ld_mode);
+		return -EINVAL;
+	}
+
+	log_debug("S %s R %s lock_dlm", ls->name, r->name);
+
+	if (daemon_test) {
+		*r_version = 0;
+		return 0;
+	}
+
+	rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv == -EAGAIN) {
+		log_error("S %s R %s lock_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+		return -EAGAIN;
+	}
+	if (rv < 0) {
+		log_error("S %s R %s lock_dlm error %d", ls->name, r->name, rv);
+		return rv;
+	}
+
+	if (rdd->vb) {
+		if (lksb->sb_flags & DLM_SBF_VALNOTVALID) {
+			log_debug("S %s R %s lock_dlm VALNOTVALID", ls->name, r->name);
+			memset(rdd->vb, 0, sizeof(struct val_blk));
+			*r_version = 0;
+			goto out;
+		}
+
+		memcpy(&vb, lksb->sb_lvbptr, sizeof(struct val_blk));
+		vb_version = le16_to_cpu(vb.version);
+
+		if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+			log_error("S %s R %s lock_dlm ignore vb_version %x",
+				  ls->name, r->name, vb_version);
+			*r_version = 0;
+			free(rdd->vb);
+			rdd->vb = NULL;
+			lksb->sb_lvbptr = NULL;
+			goto out;
+		}
+
+		*r_version = le32_to_cpu(vb.r_version);
+		memcpy(rdd->vb, &vb, sizeof(vb)); /* rdd->vb saved as le */
+
+		log_debug("S %s R %s lock_dlm get r_version %u",
+			  ls->name, r->name, *r_version);
+	}
+out:
+	return 0;
+}
+
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb = &rdd->lksb;
+	uint32_t mode;
+	uint32_t flags = 0;
+	int rv;
+
+	log_debug("S %s R %s convert_dlm", ls->name, r->name);
+
+	flags |= LKF_CONVERT;
+	flags |= LKF_NOQUEUE;
+	flags |= LKF_PERSISTENT;
+
+	if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+		if (!rdd->vb->version) {
+			/* first time vb has been written */
+			rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+		}
+		rdd->vb->r_version = cpu_to_le32(r_version);
+		memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+		log_debug("S %s R %s convert_dlm set r_version %u",
+			  ls->name, r->name, r_version);
+
+		flags |= LKF_VALBLK;
+	}
+
+	mode = to_dlm_mode(ld_mode);
+
+	if (daemon_test)
+		return 0;
+
+	rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv == -EAGAIN) {
+		/* FIXME: When does this happen?  Should something different be done? */
+		log_error("S %s R %s convert_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+		return -EAGAIN;
+	}
+	if (rv < 0) {
+		log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
+	}
+	return rv;
+}
+
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmuf_flags)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb = &rdd->lksb;
+	uint32_t flags = 0;
+	int rv;
+
+	log_debug("S %s R %s unlock_dlm r_version %u flags %x",
+		  ls->name, r->name, r_version, lmuf_flags);
+
+	/*
+	 * Do not set PERSISTENT, because we don't need an orphan
+	 * NL lock to protect anything.
+	 */
+
+	flags |= LKF_CONVERT;
+
+	if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+		if (!rdd->vb->version) {
+			/* first time vb has been written */
+			rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+		}
+		if (r_version)
+			rdd->vb->r_version = cpu_to_le32(r_version);
+		memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+		log_debug("S %s R %s unlock_dlm set r_version %u",
+			  ls->name, r->name, r_version);
+
+		flags |= LKF_VALBLK;
+	}
+
+	if (daemon_test)
+		return 0;
+
+	rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv < 0) {
+		log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
+	}
+
+	return rv;
+}
+
+/*
+ * This list could be read from dlm_controld via libdlmcontrol,
+ * but it's simpler to get it from sysfs.
+ */
+
+#define DLM_LOCKSPACES_PATH "/sys/kernel/config/dlm/cluster/spaces"
+
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+	static const char closedir_err_msg[] = "lm_get_lockspace_dlm: closedir failed";
+	struct lockspace *ls;
+	struct dirent *de;
+	DIR *ls_dir;
+
+	if (!(ls_dir = opendir(DLM_LOCKSPACES_PATH)))
+		return -ECONNREFUSED;
+
+	while ((de = readdir(ls_dir))) {
+		if (de->d_name[0] == '.')
+			continue;
+
+		if (strncmp(de->d_name, LVM_LS_PREFIX, strlen(LVM_LS_PREFIX)))
+			continue;
+
+		if (!(ls = alloc_lockspace())) {
+			if (closedir(ls_dir))
+				log_error(closedir_err_msg);
+			return -ENOMEM;
+		}
+
+		ls->lm_type = LD_LM_DLM;
+		strncpy(ls->name, de->d_name, MAX_NAME);
+		strncpy(ls->vg_name, ls->name + strlen(LVM_LS_PREFIX), MAX_NAME);
+		list_add_tail(&ls->list, ls_rejoin);
+	}
+
+	if (closedir(ls_dir))
+		log_error(closedir_err_msg);
+	return 0;
+}
+
+int lm_is_running_dlm(void)
+{
+	char sys_clustername[MAX_ARGS+1];
+	int rv;
+
+	memset(sys_clustername, 0, sizeof(sys_clustername));
+
+	rv = read_cluster_name(sys_clustername);
+	if (rv < 0)
+		return 0;
+	return 1;
+}
--- a/daemons/lvmlockd/lvmlockd-internal.h
+++ b/daemons/lvmlockd/lvmlockd-internal.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_INTERNAL_H
+#define _LVM_LVMLOCKD_INTERNAL_H
+
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+#define R_NAME_GL_DISABLED "_GLLK_disabled"
+#define R_NAME_GL          "GLLK"
+#define R_NAME_VG          "VGLK"
+#define S_NAME_GL_DLM      "lvm_global"
+#define LVM_LS_PREFIX      "lvm_"           /* ls name is prefix + vg_name */
+/* global lockspace name for sanlock is a vg name */
+
+/* lock manager types */
+enum {
+	LD_LM_NONE = 0,
+	LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */
+	LD_LM_DLM = 2,
+	LD_LM_SANLOCK = 3,
+};
+
+/* operation types */
+enum {
+	LD_OP_HELLO = 1,
+	LD_OP_QUIT,
+	LD_OP_INIT,
+	LD_OP_FREE,
+	LD_OP_START,
+	LD_OP_STOP,
+	LD_OP_LOCK,
+	LD_OP_UPDATE,
+	LD_OP_CLOSE,
+	LD_OP_ENABLE,
+	LD_OP_DISABLE,
+	LD_OP_START_WAIT,
+	LD_OP_STOP_ALL,
+	LD_OP_DUMP_INFO,
+	LD_OP_DUMP_LOG,
+	LD_OP_RENAME_BEFORE,
+	LD_OP_RENAME_FINAL,
+	LD_OP_RUNNING_LM,
+	LD_OP_FIND_FREE_LOCK,
+	LD_OP_FORGET_VG_NAME,
+	LD_OP_KILL_VG,
+	LD_OP_DROP_VG,
+};
+
+/* resource types */
+enum {
+	LD_RT_GL = 1,
+	LD_RT_VG,
+	LD_RT_LV,
+};
+
+/* lock modes, more restrictive must be larger value */
+enum {
+	LD_LK_IV = -1,
+	LD_LK_UN = 0,
+	LD_LK_NL = 1,
+	LD_LK_SH = 2,
+	LD_LK_EX = 3,
+};
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+struct client {
+	struct list_head list;
+	pthread_mutex_t mutex;
+	int pid;
+	int fd;
+	int pi;
+	uint32_t id;
+	unsigned int recv : 1;
+	unsigned int dead : 1;
+	unsigned int poll_ignore : 1;
+	char name[MAX_NAME+1];
+};
+
+#define LD_AF_PERSISTENT           0x00000001
+#define LD_AF_UNUSED               0x00000002 /* use me */
+#define LD_AF_UNLOCK_CANCEL        0x00000004
+#define LD_AF_NEXT_VERSION         0x00000008
+#define LD_AF_WAIT                 0x00000010
+#define LD_AF_FORCE                0x00000020
+#define LD_AF_EX_DISABLE           0x00000040
+#define LD_AF_ENABLE               0x00000080
+#define LD_AF_DISABLE              0x00000100
+#define LD_AF_SEARCH_LS            0x00000200
+#define LD_AF_WAIT_STARTING        0x00001000
+#define LD_AF_DUP_GL_LS            0x00002000
+#define LD_AF_INACTIVE_LS          0x00004000
+#define LD_AF_ADD_LS_ERROR         0x00008000
+#define LD_AF_ADOPT                0x00010000
+#define LD_AF_WARN_GL_REMOVED	   0x00020000
+
+/*
+ * Number of times to repeat a lock request after
+ * a lock conflict (-EAGAIN) if unspecified in the
+ * request.
+ */
+#define DEFAULT_MAX_RETRIES 4
+
+struct action {
+	struct list_head list;
+	uint32_t client_id;
+	uint32_t flags;			/* LD_AF_ */
+	uint32_t version;
+	uint64_t host_id;
+	int8_t op;			/* operation type LD_OP_ */
+	int8_t rt;			/* resource type LD_RT_ */
+	int8_t mode;			/* lock mode LD_LK_ */
+	int8_t lm_type;			/* lock manager: LM_DLM, LM_SANLOCK */
+	int retries;
+	int max_retries;
+	int result;
+	int lm_rv;			/* return value from lm_ function */
+	char vg_uuid[64];
+	char vg_name[MAX_NAME+1];
+	char lv_name[MAX_NAME+1];
+	char lv_uuid[MAX_NAME+1];
+	char vg_args[MAX_ARGS+1];
+	char lv_args[MAX_ARGS+1];
+	char vg_sysid[MAX_NAME+1];
+};
+
+struct resource {
+	struct list_head list;		/* lockspace.resources */
+	char name[MAX_NAME+1];		/* vg name or lv name */
+	int8_t type;			/* resource type LD_RT_ */
+	int8_t mode;
+	unsigned int sh_count;		/* number of sh locks on locks list */
+	uint32_t version;
+	unsigned int lm_init : 1;	/* lm_data is initialized */
+	unsigned int adopt : 1;		/* temp flag in remove_inactive_lvs */
+	unsigned int version_zero_valid : 1;
+	struct list_head locks;
+	struct list_head actions;
+	struct val_blk *vb;
+	char lv_args[MAX_ARGS+1];
+	char lm_data[0];		/* lock manager specific data */
+};
+
+#define LD_LF_PERSISTENT 0x00000001
+
+struct lock {
+	struct list_head list;		/* resource.locks */
+	int8_t mode;			/* lock mode LD_LK_ */
+	uint32_t version;
+	uint32_t flags;			/* LD_LF_ */
+	uint32_t client_id; /* may be 0 for persistent or internal locks */
+};
+
+struct lockspace {
+	struct list_head list;		/* lockspaces */
+	char name[MAX_NAME+1];
+	char vg_name[MAX_NAME+1];
+	char vg_uuid[64];
+	char vg_args[MAX_ARGS+1];	/* lock manager specific args */
+	char vg_sysid[MAX_NAME+1];
+	int8_t lm_type;			/* lock manager: LM_DLM, LM_SANLOCK */
+	void *lm_data;
+	uint64_t host_id;
+	uint64_t free_lock_offset;	/* start search for free lock here */
+
+	uint32_t start_client_id;	/* client_id that started the lockspace */
+	pthread_t thread;		/* makes synchronous lock requests */
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	unsigned int create_fail : 1;
+	unsigned int create_done : 1;
+	unsigned int thread_work : 1;
+	unsigned int thread_stop : 1;
+	unsigned int thread_done : 1;
+	unsigned int sanlock_gl_enabled: 1;
+	unsigned int sanlock_gl_dup: 1;
+	unsigned int free_vg: 1;
+	unsigned int kill_vg: 1;
+	unsigned int drop_vg: 1;
+
+	struct list_head actions;	/* new client actions */
+	struct list_head resources;	/* resource/lock state for gl/vg/lv */
+};
+
+#define VAL_BLK_VERSION 0x0101
+
+struct val_blk {
+	uint16_t version;
+	uint16_t flags;
+	uint32_t r_version;
+};
+
+/* lm_unlock flags */
+#define LMUF_FREE_VG 0x00000001
+
+#define container_of(ptr, type, member) ({                      \
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+                              struct list_head *prev,
+                              struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+#define list_for_each_entry(pos, head, member)                          \
+	for (pos = list_entry((head)->next, typeof(*pos), member);      \
+	     &pos->member != (head);    \
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member)                  \
+	for (pos = list_entry((head)->next, typeof(*pos), member),      \
+	     n = list_entry(pos->member.next, typeof(*pos), member); \
+	     &pos->member != (head);                                    \
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/* to improve readability */
+#define WAIT     1
+#define NO_WAIT  0
+#define FORCE    1
+#define NO_FORCE 0
+
+/*
+ * global variables
+ */
+
+#ifndef EXTERN
+#define EXTERN extern
+#define INIT(X)
+#else
+#undef EXTERN
+#define EXTERN
+#define INIT(X) =X
+#endif
+
+/*
+ * gl_type_static and gl_use_ are set by command line or config file
+ * to specify whether the global lock comes from dlm or sanlock.
+ * Without a static setting, lvmlockd will figure out where the
+ * global lock should be (but it could get mixed up in cases where
+ * both sanlock and dlm vgs exist.)
+ *
+ * gl_use_dlm means that the gl should come from lockspace gl_lsname_dlm
+ * gl_use_sanlock means that the gl should come from lockspace gl_lsname_sanlock
+ *
+ * gl_use_dlm has precedence over gl_use_sanlock, so if a node sees both
+ * dlm and sanlock vgs, it will use the dlm gl.
+ *
+ * gl_use_ is set when the first evidence of that lm_type is seen
+ * in any command.
+ *
+ * gl_lsname_sanlock is set when the first vg is seen in which an
+ * enabled gl is exists, or when init_vg creates a vg with gl enabled,
+ * or when enable_gl is used.
+ *
+ * gl_lsname_sanlock is cleared when free_vg deletes a vg with gl enabled
+ * or when disable_gl matches.
+ */
+
+EXTERN int gl_running_dlm;
+EXTERN int gl_type_static;
+EXTERN int gl_use_dlm;
+EXTERN int gl_use_sanlock;
+EXTERN pthread_mutex_t gl_type_mutex;
+
+EXTERN char gl_lsname_dlm[MAX_NAME+1];
+EXTERN char gl_lsname_sanlock[MAX_NAME+1];
+
+EXTERN int daemon_test; /* run as much as possible without a live lock manager */
+EXTERN int daemon_debug;
+EXTERN int daemon_host_id;
+EXTERN const char *daemon_host_id_file;
+EXTERN int sanlock_io_timeout;
+
+/*
+ * This flag is set to 1 if we see multiple vgs with the global
+ * lock enabled.  While this is set, we return a special flag
+ * with the vg lock result indicating to the lvm command that
+ * there is a duplicate gl in the vg which should be resolved.
+ * While this is set, find_lockspace_name has the side job of
+ * counting the number of lockspaces with enabled gl's so that
+ * this can be set back to zero when the duplicates are disabled.
+ */
+EXTERN int sanlock_gl_dup;
+
+void log_level(int level, const char *fmt, ...)  __attribute__((format(printf, 2, 3)));
+#define log_debug(fmt, args...) log_level(LOG_DEBUG, fmt, ##args)
+#define log_error(fmt, args...) log_level(LOG_ERR, fmt, ##args)
+#define log_warn(fmt, args...) log_level(LOG_WARNING, fmt, ##args)
+
+struct lockspace *alloc_lockspace(void);
+int lockspaces_empty(void);
+int last_string_from_args(char *args_in, char *last);
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+
+
+#ifdef LOCKDDLM_SUPPORT
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_dlm(struct lockspace *ls);
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg);
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt);
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version);
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmu_flags);
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r);
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin);
+int lm_data_size_dlm(void);
+int lm_is_running_dlm(void);
+
+static inline int lm_support_dlm(void)
+{
+	return 1;
+}
+
+#else
+
+static inline int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+	return -1;
+}
+
+static inline int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version)
+{
+	return -1;
+}
+
+static inline int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmu_flags)
+{
+	return -1;
+}
+
+static inline int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+	return -1;
+}
+
+static inline int lm_data_size_dlm(void)
+{
+	return -1;
+}
+
+static inline int lm_is_running_dlm(void)
+{
+	return 0;
+}
+
+static inline int lm_support_dlm(void)
+{
+	return 0;
+}
+
+#endif /* dlm support */
+
+#ifdef LOCKDSANLOCK_SUPPORT
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset);
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
+int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_sanlock(struct lockspace *ls);
+int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+		    uint32_t *r_version, int *retry, int adopt);
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+		       int ld_mode, uint32_t r_version);
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+		      uint32_t r_version, uint32_t lmu_flags);
+int lm_able_gl_sanlock(struct lockspace *ls, int enable);
+int lm_ex_disable_gl_sanlock(struct lockspace *ls);
+int lm_hosts_sanlock(struct lockspace *ls, int notify);
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r);
+int lm_gl_is_enabled(struct lockspace *ls);
+int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin);
+int lm_data_size_sanlock(void);
+int lm_is_running_sanlock(void);
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset);
+
+static inline int lm_support_sanlock(void)
+{
+	return 1;
+}
+
+#else
+
+static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset)
+{
+	return -1;
+}
+
+static inline int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_prepare_lockspace_sanlock(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg)
+{
+	return -1;
+}
+
+static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+		    uint32_t *r_version, int *retry, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+		       int ld_mode, uint32_t r_version)
+{
+	return -1;
+}
+
+static inline int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+		      uint32_t r_version, uint32_t lmu_flags)
+{
+	return -1;
+}
+
+static inline int lm_able_gl_sanlock(struct lockspace *ls, int enable)
+{
+	return -1;
+}
+
+static inline int lm_ex_disable_gl_sanlock(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_hosts_sanlock(struct lockspace *ls, int notify)
+{
+	return -1;
+}
+
+static inline int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_gl_is_enabled(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin)
+{
+	return -1;
+}
+
+static inline int lm_data_size_sanlock(void)
+{
+	return -1;
+}
+
+static inline int lm_is_running_sanlock(void)
+{
+	return 0;
+}
+
+static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset)
+{
+	return -1;
+}
+
+static inline int lm_support_sanlock(void)
+{
+	return 0;
+}
+
+#endif /* sanlock support */
+
+#endif	/* _LVM_LVMLOCKD_INTERNAL_H */
--- a/daemons/lvmlockd/lvmlockd-sanlock.c
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
--- a/daemons/lvmpolld/.gitignore
+++ b/daemons/lvmpolld/.gitignore
@@ -0,0 +1 @@
+lvmpolld
--- a/daemons/lvmpolld/Makefile.in
+++ b/daemons/lvmpolld/Makefile.in
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmpolld-core.c lvmpolld-data-utils.c lvmpolld-cmd-utils.c
+
+TARGETS = lvmpolld
+
+.PHONY: install_lvmpolld
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+CFLOW_TARGET = lvmpolld
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server $(DAEMON_LDFLAGS)
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+CFLAGS += $(DAEMON_CFLAGS)
+
+lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+install_lvmpolld: lvmpolld
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmpolld
+
+install: install_lvm2
--- a/daemons/lvmpolld/lvmpolld-cmd-utils.c
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+/* extract this info from autoconf/automake files */
+#define LVPOLL_CMD "lvpoll"
+
+#define MIN_ARGV_SIZE  8
+
+static const char *const const polling_ops[] = { [PVMOVE] = LVMPD_REQ_PVMOVE,
+						 [CONVERT] = LVMPD_REQ_CONVERT,
+						 [MERGE] = LVMPD_REQ_MERGE,
+						 [MERGE_THIN] = LVMPD_REQ_MERGE_THIN };
+
+const char *polling_op(enum poll_type type)
+{
+	return type < POLL_TYPE_MAX ? polling_ops[type] : "<undefined>";
+}
+
+static int add_to_cmd_arr(const char ***cmdargv, const char *str, unsigned *ind)
+{
+	const char **newargv = *cmdargv;
+
+	if (*ind && !(*ind % MIN_ARGV_SIZE)) {
+		newargv = dm_realloc(*cmdargv, (*ind / MIN_ARGV_SIZE + 1) * MIN_ARGV_SIZE * sizeof(char *));
+		if (!newargv)
+			return 0;
+		*cmdargv = newargv;
+	}
+
+	*(*cmdargv + (*ind)++) = str;
+
+	return 1;
+}
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort_polling, unsigned handle_missing_pvs)
+{
+	unsigned i = 0;
+	const char **cmd_argv = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+	if (!cmd_argv)
+		return NULL;
+
+	/* path to lvm2 binary */
+	if (!add_to_cmd_arr(&cmd_argv, lvm_binary, &i))
+		goto err;
+
+	/* cmd to execute */
+	if (!add_to_cmd_arr(&cmd_argv, LVPOLL_CMD, &i))
+		goto err;
+
+	/* transfer internal polling interval */
+	if (pdlv->sinterval &&
+	    (!add_to_cmd_arr(&cmd_argv, "--interval", &i) ||
+	     !add_to_cmd_arr(&cmd_argv, pdlv->sinterval, &i)))
+		goto err;
+
+	/* pass abort param */
+	if (abort_polling &&
+	    !add_to_cmd_arr(&cmd_argv, "--abort", &i))
+		goto err;
+
+	/* pass handle-missing-pvs. used by mirror polling operation */
+	if (handle_missing_pvs &&
+	    !add_to_cmd_arr(&cmd_argv, "--handlemissingpvs", &i))
+		goto err;
+
+	/* one of: "convert", "pvmove", "merge", "merge_thin" */
+	if (!add_to_cmd_arr(&cmd_argv, "--polloperation", &i) ||
+	    !add_to_cmd_arr(&cmd_argv, polling_ops[pdlv->type], &i))
+		goto err;
+
+	/* vg/lv name */
+	if (!add_to_cmd_arr(&cmd_argv, pdlv->lvname, &i))
+		goto err;
+
+	/* disable metadata backup */
+	if (!add_to_cmd_arr(&cmd_argv, "-An", &i))
+		goto err;
+
+	/* terminating NULL */
+	if (!add_to_cmd_arr(&cmd_argv, NULL, &i))
+		goto err;
+
+	return cmd_argv;
+err:
+	dm_free(cmd_argv);
+	return NULL;
+}
+
+/* FIXME: in fact exclude should be va list */
+static int copy_env(const char ***cmd_envp, unsigned *i, const char *exclude)
+{
+	const char * const* tmp = (const char * const*) environ;
+
+	if (!tmp)
+		return 0;
+
+	while (*tmp) {
+		if (strncmp(*tmp, exclude, strlen(exclude)) && !add_to_cmd_arr(cmd_envp, *tmp, i))
+			return 0;
+		tmp++;
+	}
+
+	return 1;
+}
+
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv)
+{
+	unsigned i = 0;
+	const char **cmd_envp = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+	if (!cmd_envp)
+		return NULL;
+
+	/* copy whole environment from lvmpolld, exclude LVM_SYSTEM_DIR if set */
+	if (!copy_env(&cmd_envp, &i, "LVM_SYSTEM_DIR="))
+		goto err;
+
+	/* Add per client LVM_SYSTEM_DIR variable if set */
+	if (*pdlv->lvm_system_dir_env && !add_to_cmd_arr(&cmd_envp, pdlv->lvm_system_dir_env, &i))
+		goto err;
+
+	/* terminating NULL */
+	if (!add_to_cmd_arr(&cmd_envp, NULL, &i))
+		goto err;
+
+	return cmd_envp;
+err:
+	dm_free(cmd_envp);
+	return NULL;
+}
--- a/daemons/lvmpolld/lvmpolld-cmd-utils.h
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_CMD_UTILS_H
+#define _LVM_LVMPOLLD_CMD_UTILS_H
+
+#include "lvmpolld-data-utils.h"
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort, unsigned handle_missing_pvs);
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv);
+
+const char *polling_op(enum poll_type);
+
+#endif /* _LVM_LVMPOLLD_CMD_UTILS_H */
--- a/daemons/lvmpolld/lvmpolld-common.h
+++ b/daemons/lvmpolld/lvmpolld-common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Rackable Systems All rights reserved.  
+ * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
 *
 * This file is part of LVM2.
 *
@@ -12,22 +12,20 @@
 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

-#ifndef _LVM_TIMESTAMP_H
-#define _LVM_TIMESTAMP_H
-
-struct timestamp;
-
-struct timestamp *get_timestamp(void);
-
-/* cmp_timestamp: Compare two timestamps
- * 
- * Return: -1 if t1 is less than t2
- *  	    0 if t1 is equal to t2
- *          1 if t1 is greater than t2
+/*
+ * This file must be included first by every lvmpolld source file.
 */
-int cmp_timestamp(struct timestamp *t1, struct timestamp *t2);
+#ifndef _LVM_LVMPOLLD_COMMON_H
+#define _LVM_LVMPOLLD_COMMON_H

-void destroy_timestamp(struct timestamp *t);
+#define _REENTRANT

-#endif /* _LVM_TIMESTAMP_H */
+#include "tool.h"

+#include "lvmpolld-cmd-utils.h"
+#include "lvmpolld-protocol.h"
+
+#include <assert.h>
+#include <errno.h>
+
+#endif	/* _LVM_LVMPOLLD_COMMON_H */
--- a/daemons/lvmpolld/lvmpolld-core.c
+++ b/daemons/lvmpolld/lvmpolld-core.c
@@ -0,0 +1,985 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "lvm-version.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+
+#include <getopt.h>
+#include <poll.h>
+#include <wait.h>
+
+#define LVMPOLLD_SOCKET DEFAULT_RUN_DIR "/lvmpolld.socket"
+
+#define PD_LOG_PREFIX "LVMPOLLD"
+#define LVM2_LOG_PREFIX "\tLVPOLL"
+
+/* predefined reason for response = "failed" case */
+#define REASON_REQ_NOT_IMPLEMENTED "request not implemented"
+#define REASON_MISSING_LVID "request requires lvid set"
+#define REASON_MISSING_LVNAME "request requires lvname set"
+#define REASON_MISSING_VGNAME "request requires vgname set"
+#define REASON_POLLING_FAILED "polling of lvm command failed"
+#define REASON_ILLEGAL_ABORT_REQUEST "abort only supported with PVMOVE polling operation"
+#define REASON_DIFFERENT_OPERATION_IN_PROGRESS "Different operation on LV already in progress"
+#define REASON_INVALID_INTERVAL "request requires interval set"
+#define REASON_ENOMEM "not enough memory"
+
+struct lvmpolld_state {
+	daemon_idle *idle;
+	log_state *log;
+	const char *log_config;
+	const char *lvm_binary;
+
+	struct lvmpolld_store *id_to_pdlv_abort;
+	struct lvmpolld_store *id_to_pdlv_poll;
+};
+
+static pthread_key_t key;
+
+static const char *_strerror_r(int errnum, struct lvmpolld_thread_data *data)
+{
+#ifdef _GNU_SOURCE
+	return strerror_r(errnum, data->buf, sizeof(data->buf)); /* never returns NULL */
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600)
+	return strerror_r(errnum, data->buf, sizeof(data->buf)) ? "" : data->buf;
+#else
+#	warning "Can't decide proper strerror_r implementation. lvmpolld will not issue specific system error messages"
+	return "";
+#endif
+}
+
+static void _usage(const char *prog, FILE *file)
+{
+	fprintf(file, "Usage:\n"
+		"%s [-V] [-h] [-f] [-l {all|wire|debug}] [-s path] [-B path] [-p path] [-t secs]\n"
+		"%s --dump [-s path]\n"
+		"   -V|--version     Show version info\n"
+		"   -h|--help        Show this help information\n"
+		"   -f|--foreground  Don't fork, run in the foreground\n"
+		"   --dump           Dump full lvmpolld state\n"
+		"   -l|--log         Logging message level (-l {all|wire|debug})\n"
+		"   -p|--pidfile     Set path to the pidfile\n"
+		"   -s|--socket      Set path to the communication socket\n"
+		"   -B|--binary      Path to lvm2 binary\n"
+		"   -t|--timeout     Time to wait in seconds before shutdown on idle (missing or 0 = inifinite)\n\n", prog, prog);
+}
+
+static int _init(struct daemon_state *s)
+{
+	struct lvmpolld_state *ls = s->private;
+	ls->log = s->log;
+
+	if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1))
+		return 0;
+
+	if (pthread_key_create(&key, lvmpolld_thread_data_destroy)) {
+		FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to create pthread key");
+		return 0;
+	}
+
+	ls->id_to_pdlv_poll = pdst_init("polling");
+	ls->id_to_pdlv_abort = pdst_init("abort");
+
+	if (!ls->id_to_pdlv_poll || !ls->id_to_pdlv_abort) {
+		FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to allocate internal data structures");
+		return 0;
+	}
+
+	ls->lvm_binary = ls->lvm_binary ?: LVM_PATH;
+
+	if (access(ls->lvm_binary, X_OK)) {
+		FATAL(ls, "%s: %s %s", PD_LOG_PREFIX, "Execute access rights denied on", ls->lvm_binary);
+		return 0;
+	}
+
+	if (ls->idle)
+		ls->idle->is_idle = 1;
+
+	return 1;
+}
+
+static void _lvmpolld_stores_lock(struct lvmpolld_state *ls)
+{
+	pdst_lock(ls->id_to_pdlv_poll);
+	pdst_lock(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_stores_unlock(struct lvmpolld_state *ls)
+{
+	pdst_unlock(ls->id_to_pdlv_abort);
+	pdst_unlock(ls->id_to_pdlv_poll);
+}
+
+static void _lvmpolld_global_lock(struct lvmpolld_state *ls)
+{
+	_lvmpolld_stores_lock(ls);
+
+	pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_poll);
+	pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_global_unlock(struct lvmpolld_state *ls)
+{
+	pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_abort);
+	pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_poll);
+
+	_lvmpolld_stores_unlock(ls);
+}
+
+static int _fini(struct daemon_state *s)
+{
+	int done;
+	const struct timespec t = { .tv_nsec = 250000000 }; /* .25 sec */
+	struct lvmpolld_state *ls = s->private;
+
+	DEBUGLOG(s, "fini");
+
+	DEBUGLOG(s, "sending cancel requests");
+
+	_lvmpolld_global_lock(ls);
+	pdst_locked_send_cancel(ls->id_to_pdlv_poll);
+	pdst_locked_send_cancel(ls->id_to_pdlv_abort);
+	_lvmpolld_global_unlock(ls);
+
+	DEBUGLOG(s, "waiting for background threads to finish");
+
+	while(1) {
+		_lvmpolld_stores_lock(ls);
+		done = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+		       !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+		_lvmpolld_stores_unlock(ls);
+		if (done)
+			break;
+		nanosleep(&t, NULL);
+	}
+
+	DEBUGLOG(s, "destroying internal data structures");
+
+	_lvmpolld_stores_lock(ls);
+	pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_poll);
+	pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_abort);
+	_lvmpolld_stores_unlock(ls);
+
+	pdst_destroy(ls->id_to_pdlv_poll);
+	pdst_destroy(ls->id_to_pdlv_abort);
+
+	pthread_key_delete(key);
+
+	return 1;
+}
+
+static response reply(const char *res, const char *reason)
+{
+	return daemon_reply_simple(res, "reason = %s", reason, NULL);
+}
+
+static int read_single_line(struct lvmpolld_thread_data *data, int err)
+{
+	ssize_t r = getline(&data->line, &data->line_size, err ? data->ferr : data->fout);
+
+	if (r > 0 && *(data->line + r - 1) == '\n')
+		*(data->line + r - 1) = '\0';
+
+	return (r > 0);
+}
+
+static void update_idle_state(struct lvmpolld_state *ls)
+{
+	if (!ls->idle)
+		return;
+
+	_lvmpolld_stores_lock(ls);
+
+	ls->idle->is_idle = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+			    !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+
+	_lvmpolld_stores_unlock(ls);
+
+	DEBUGLOG(ls, "%s: %s %s%s", PD_LOG_PREFIX, "daemon is", ls->idle->is_idle ? "" : "not ", "idle");
+}
+
+/* make this configurable */
+#define MAX_TIMEOUT 2
+
+static int poll_for_output(struct lvmpolld_lv *pdlv, struct lvmpolld_thread_data *data)
+{
+	int ch_stat, r, err = 1, fds_count = 2, timeout = 0;
+	pid_t pid;
+	struct lvmpolld_cmd_stat cmd_state = { .retcode = -1, .signal = 0 };
+	struct pollfd fds[] = { { .fd = data->outpipe[0], .events = POLLIN },
+				{ .fd = data->errpipe[0], .events = POLLIN } };
+
+	if (!(data->fout = fdopen(data->outpipe[0], "r")) || !(data->ferr = fdopen(data->errpipe[0], "r"))) {
+		ERROR(pdlv->ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to open file stream",
+		      errno, _strerror_r(errno, data));
+		goto out;
+	}
+
+	while (1) {
+		do {
+			r = poll(fds, 2, pdlv_get_timeout(pdlv) * 1000);
+		} while (r < 0 && errno == EINTR);
+
+		DEBUGLOG(pdlv->ls, "%s: %s %d", PD_LOG_PREFIX, "poll() returned", r);
+		if (r < 0) {
+			ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+			      PD_LOG_PREFIX, "poll() for LVM2 cmd", pdlv->cmd_pid,
+			      errno, _strerror_r(errno, data));
+			goto out;
+		} else if (!r) {
+			timeout++;
+
+			WARN(pdlv->ls, "%s: %s (PID %d) %s", PD_LOG_PREFIX,
+			     "polling for output of the lvm cmd", pdlv->cmd_pid,
+			     "has timed out");
+
+			if (timeout > MAX_TIMEOUT) {
+				ERROR(pdlv->ls, "%s: %s (PID %d) (no output for %d seconds)",
+				      PD_LOG_PREFIX,
+				      "LVM2 cmd is unresponsive too long",
+				      pdlv->cmd_pid,
+				      timeout * pdlv_get_timeout(pdlv));
+				goto out;
+			}
+
+			continue; /* while(1) */
+		}
+
+		timeout = 0;
+
+		/* handle the command's STDOUT */
+		if (fds[0].revents & POLLIN) {
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught input data in STDOUT");
+
+			assert(read_single_line(data, 0)); /* may block indef. anyway */
+			INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+			     pdlv->cmd_pid, "STDOUT", data->line);
+		} else if (fds[0].revents) {
+			if (fds[0].revents & POLLHUP)
+				DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught POLLHUP");
+			else
+				WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+			fds[0].fd = -1;
+			fds_count--;
+		}
+
+		/* handle the command's STDERR */
+		if (fds[1].revents & POLLIN) {
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX,
+				 "caught input data in STDERR");
+
+			assert(read_single_line(data, 1)); /* may block indef. anyway */
+			INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+			     pdlv->cmd_pid, "STDERR", data->line);
+		} else if (fds[1].revents) {
+			if (fds[1].revents & POLLHUP)
+				DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught err POLLHUP");
+			else
+				WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+			fds[1].fd = -1;
+			fds_count--;
+		}
+
+		do {
+			/*
+			 * fds_count == 0 means polling reached EOF
+			 * or received error on both descriptors.
+			 * In such case, just wait for command to finish
+			 */
+			pid = waitpid(pdlv->cmd_pid, &ch_stat, fds_count ? WNOHANG : 0);
+		} while (pid < 0 && errno == EINTR);
+
+		if (pid) {
+			if (pid < 0) {
+				ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+				      PD_LOG_PREFIX, "waitpid() for lvm2 cmd",
+				      pdlv->cmd_pid, errno,
+				      _strerror_r(errno, data));
+				goto out;
+			}
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "child exited");
+			break;
+		}
+	} /* while(1) */
+
+	DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "about to collect remaining lines");
+	if (fds[0].fd >= 0)
+		while (read_single_line(data, 0)) {
+			assert(r > 0);
+			INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDOUT", data->line);
+		}
+	if (fds[1].fd >= 0)
+		while (read_single_line(data, 1)) {
+			assert(r > 0);
+			INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDERR", data->line);
+		}
+
+	if (WIFEXITED(ch_stat)) {
+		INFO(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+		     "lvm2 cmd", pdlv->cmd_pid, "exited with", WEXITSTATUS(ch_stat));
+		cmd_state.retcode = WEXITSTATUS(ch_stat);
+	} else if (WIFSIGNALED(ch_stat)) {
+		WARN(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+		     "lvm2 cmd", pdlv->cmd_pid, "got terminated by signal",
+		     WTERMSIG(ch_stat));
+		cmd_state.signal = WTERMSIG(ch_stat);
+	}
+
+	err = 0;
+out:
+	if (!err)
+		pdlv_set_cmd_state(pdlv, &cmd_state);
+
+	return err;
+}
+
+static void debug_print(struct lvmpolld_state *ls, const char * const* ptr)
+{
+	const char * const* tmp = ptr;
+
+	if (!tmp)
+		return;
+
+	while (*tmp) {
+		DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, *tmp);
+		tmp++;
+	}
+}
+
+static void *fork_and_poll(void *args)
+{
+	int outfd, errfd, state;
+	struct lvmpolld_thread_data *data;
+	pid_t r;
+
+	int error = 1;
+	struct lvmpolld_lv *pdlv = (struct lvmpolld_lv *) args;
+	struct lvmpolld_state *ls = pdlv->ls;
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+	data = lvmpolld_thread_data_constructor(pdlv);
+	pthread_setspecific(key, data);
+	pthread_setcancelstate(state, &state);
+
+	if (!data) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "Failed to initialize per-thread data");
+		goto err;
+	}
+
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd line arguments:");
+	debug_print(ls, pdlv->cmdargv);
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd environment variables:");
+	debug_print(ls, pdlv->cmdenvp);
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+	outfd = data->outpipe[1];
+	errfd = data->errpipe[1];
+
+	r = fork();
+	if (!r) {
+		/* child */
+		/* !!! Do not touch any posix thread primitives !!! */
+
+		if ((dup2(outfd, STDOUT_FILENO ) != STDOUT_FILENO) ||
+		    (dup2(errfd, STDERR_FILENO ) != STDERR_FILENO))
+			_exit(LVMPD_RET_DUP_FAILED);
+
+		execve(*(pdlv->cmdargv), (char *const *)pdlv->cmdargv, (char *const *)pdlv->cmdenvp);
+
+		_exit(LVMPD_RET_EXC_FAILED);
+	} else {
+		/* parent */
+		if (r == -1) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "fork failed",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+
+		INFO(ls, "%s: LVM2 cmd \"%s\" (PID: %d)", PD_LOG_PREFIX, *(pdlv->cmdargv), r);
+
+		pdlv->cmd_pid = r;
+
+		/* failure to close write end of any pipe will result in broken polling */
+		if (close(data->outpipe[1])) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of pipe",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+		data->outpipe[1] = -1;
+
+		if (close(data->errpipe[1])) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of err pipe",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+		data->errpipe[1] = -1;
+
+		error = poll_for_output(pdlv, data);
+		DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "polling for lvpoll output has finished");
+	}
+
+err:
+	r = 0;
+
+	pdst_lock(pdlv->pdst);
+
+	if (error) {
+		/* last reader is responsible for pdlv cleanup */
+		r = pdlv->cmd_pid;
+		pdlv_set_error(pdlv, 1);
+	}
+
+	pdlv_set_polling_finished(pdlv, 1);
+	if (data)
+		data->pdlv = NULL;
+
+	pdst_locked_dec(pdlv->pdst);
+
+	pdst_unlock(pdlv->pdst);
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+	lvmpolld_thread_data_destroy(data);
+	pthread_setspecific(key, NULL);
+	pthread_setcancelstate(state, &state);
+
+	update_idle_state(ls);
+
+	/*
+	 * This is unfortunate case where we
+	 * know nothing about state of lvm cmd and
+	 * (eventually) ongoing progress.
+	 *
+	 * harvest zombies
+	 */
+	if (r)
+		while(waitpid(r, NULL, 0) < 0 && errno == EINTR);
+
+	return NULL;
+}
+
+static response progress_info(client_handle h, struct lvmpolld_state *ls, request req)
+{
+	char *id;
+	struct lvmpolld_lv *pdlv;
+	struct lvmpolld_store *pdst;
+	struct lvmpolld_lv_state st;
+	response r;
+	const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+	const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+	unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+	if (!lvid)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+	id = construct_id(sysdir, lvid);
+	if (!id) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "progress_info request failed to construct ID.");
+		return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+	}
+
+	DEBUGLOG(ls, "%s: %s: %s", PD_LOG_PREFIX, "ID", id);
+
+	pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+	pdst_lock(pdst);
+
+	pdlv = pdst_locked_lookup(pdst, id);
+	if (pdlv) {
+		/*
+		 * with store lock held, I'm the only reader accessing the pdlv
+		 */
+		st = pdlv_get_status(pdlv);
+
+		if (st.error || st.polling_finished) {
+			INFO(ls, "%s: %s %s", PD_LOG_PREFIX,
+			     "Polling finished. Removing related data structure for LV",
+			     lvid);
+			pdst_locked_remove(pdst, id);
+			pdlv_destroy(pdlv);
+		}
+	}
+	/* pdlv must not be dereferenced from now on */
+
+	pdst_unlock(pdst);
+
+	dm_free(id);
+
+	if (pdlv) {
+		if (st.error)
+			return reply(LVMPD_RESP_FAILED, REASON_POLLING_FAILED);
+
+		if (st.polling_finished)
+			r = daemon_reply_simple(LVMPD_RESP_FINISHED,
+						"reason = %s", st.cmd_state.signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE,
+						LVMPD_PARM_VALUE " = %d", (int64_t)(st.cmd_state.signal ?: st.cmd_state.retcode),
+						NULL);
+		else
+			r = daemon_reply_simple(LVMPD_RESP_IN_PROGRESS, NULL);
+	}
+	else
+		r = daemon_reply_simple(LVMPD_RESP_NOT_FOUND, NULL);
+
+	return r;
+}
+
+static struct lvmpolld_lv *construct_pdlv(request req, struct lvmpolld_state *ls,
+				     struct lvmpolld_store *pdst,
+				     const char *interval, const char *id,
+				     const char *vgname, const char *lvname,
+				     const char *sysdir, enum poll_type type,
+				     unsigned abort_polling, unsigned uinterval)
+{
+	const char **cmdargv, **cmdenvp;
+	struct lvmpolld_lv *pdlv;
+	unsigned handle_missing_pvs = daemon_request_int(req, LVMPD_PARM_HANDLE_MISSING_PVS, 0);
+
+	pdlv = pdlv_create(ls, id, vgname, lvname, sysdir, type,
+			   interval, uinterval, pdst);
+
+	if (!pdlv) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to create internal LV data structure.");
+		return NULL;
+	}
+
+	cmdargv = cmdargv_ctr(pdlv, pdlv->ls->lvm_binary, abort_polling, handle_missing_pvs);
+	if (!cmdargv) {
+		pdlv_destroy(pdlv);
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd arguments for lvpoll command");
+		return NULL;
+	}
+
+	pdlv->cmdargv = cmdargv;
+
+	cmdenvp = cmdenvp_ctr(pdlv);
+	if (!cmdenvp) {
+		pdlv_destroy(pdlv);
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd environment for lvpoll command");
+		return NULL;
+	}
+
+	pdlv->cmdenvp = cmdenvp;
+
+	return pdlv;
+}
+
+static int spawn_detached_thread(struct lvmpolld_lv *pdlv)
+{
+	int r;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+	r = pthread_create(&pdlv->tid, &attr, fork_and_poll, (void *)pdlv);
+
+	pthread_attr_destroy(&attr);
+
+	return !r;
+}
+
+static response poll_init(client_handle h, struct lvmpolld_state *ls, request req, enum poll_type type)
+{
+	char *id;
+	struct lvmpolld_lv *pdlv;
+	struct lvmpolld_store *pdst;
+	unsigned uinterval;
+
+	const char *interval = daemon_request_str(req, LVMPD_PARM_INTERVAL, NULL);
+	const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+	const char *lvname = daemon_request_str(req, LVMPD_PARM_LVNAME, NULL);
+	const char *vgname = daemon_request_str(req, LVMPD_PARM_VGNAME, NULL);
+	const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+	unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+	assert(type < POLL_TYPE_MAX);
+
+	if (abort_polling && type != PVMOVE)
+		return reply(LVMPD_RESP_EINVAL, REASON_ILLEGAL_ABORT_REQUEST);
+
+	if (!interval || strpbrk(interval, "-") || sscanf(interval, "%u", &uinterval) != 1)
+		return reply(LVMPD_RESP_EINVAL, REASON_INVALID_INTERVAL);
+
+	if (!lvname)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVNAME);
+
+	if (!lvid)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+	if (!vgname)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_VGNAME);
+
+	id = construct_id(sysdir, lvid);
+	if (!id) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "poll_init request failed to construct ID.");
+		return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+	}
+
+	DEBUGLOG(ls, "%s: %s=%s", PD_LOG_PREFIX, "ID", id);
+
+	pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+	pdst_lock(pdst);
+
+	pdlv = pdst_locked_lookup(pdst, id);
+	if (pdlv && pdlv_get_polling_finished(pdlv)) {
+		WARN(ls, "%s: %s %s", PD_LOG_PREFIX, "Force removal of uncollected info for LV",
+			 lvid);
+		/* 
+		 * lvmpolld has to remove uncollected results in this case.
+		 * otherwise it would have to refuse request for new polling
+		 * lv with same id.
+		 */
+		pdst_locked_remove(pdst, id);
+		pdlv_destroy(pdlv);
+		pdlv = NULL;
+	}
+
+	if (pdlv) {
+		if (!pdlv_is_type(pdlv, type)) {
+			pdst_unlock(pdst);
+			ERROR(ls, "%s: %s '%s': expected: %s, requested: %s",
+			      PD_LOG_PREFIX, "poll operation type mismatch on LV identified by",
+			      id,
+			      polling_op(pdlv_get_type(pdlv)), polling_op(type));
+			dm_free(id);
+			return reply(LVMPD_RESP_EINVAL,
+				     REASON_DIFFERENT_OPERATION_IN_PROGRESS);
+		}
+		pdlv->init_rq_count++; /* safe. protected by store lock */
+	} else {
+		pdlv = construct_pdlv(req, ls, pdst, interval, id, vgname,
+				      lvname, sysdir, type, abort_polling, 2 * uinterval);
+		if (!pdlv) {
+			pdst_unlock(pdst);
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+		if (!pdst_locked_insert(pdst, id, pdlv)) {
+			pdlv_destroy(pdlv);
+			pdst_unlock(pdst);
+			ERROR(ls, "%s: %s", PD_LOG_PREFIX, "couldn't store internal LV data structure");
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+		if (!spawn_detached_thread(pdlv)) {
+			ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to spawn detached monitoring thread");
+			pdst_locked_remove(pdst, id);
+			pdlv_destroy(pdlv);
+			pdst_unlock(pdst);
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+
+		pdst_locked_inc(pdst);
+		if (ls->idle)
+			ls->idle->is_idle = 0;
+	}
+
+	pdst_unlock(pdst);
+
+	dm_free(id);
+
+	return daemon_reply_simple(LVMPD_RESP_OK, NULL);
+}
+
+static response dump_state(client_handle h, struct lvmpolld_state *ls, request r)
+{
+	response res = { 0 };
+	struct buffer *b = &res.buffer;
+
+	buffer_init(b);
+
+	_lvmpolld_global_lock(ls);
+
+	buffer_append(b, "# Registered polling operations\n\n");
+	buffer_append(b, "poll {\n");
+	pdst_locked_dump(ls->id_to_pdlv_poll, b);
+	buffer_append(b, "}\n\n");
+
+	buffer_append(b, "# Registered abort operations\n\n");
+	buffer_append(b, "abort {\n");
+	pdst_locked_dump(ls->id_to_pdlv_abort, b);
+	buffer_append(b, "}");
+
+	_lvmpolld_global_unlock(ls);
+
+	return res;
+}
+
+static response _handler(struct daemon_state s, client_handle h, request r)
+{
+	struct lvmpolld_state *ls = s.private;
+	const char *rq = daemon_request_str(r, "request", "NONE");
+
+	if (!strcmp(rq, LVMPD_REQ_PVMOVE))
+		return poll_init(h, ls, r, PVMOVE);
+	else if (!strcmp(rq, LVMPD_REQ_CONVERT))
+		return poll_init(h, ls, r, CONVERT);
+	else if (!strcmp(rq, LVMPD_REQ_MERGE))
+		return poll_init(h, ls, r, MERGE);
+	else if (!strcmp(rq, LVMPD_REQ_MERGE_THIN))
+		return poll_init(h, ls, r, MERGE_THIN);
+	else if (!strcmp(rq, LVMPD_REQ_PROGRESS))
+		return progress_info(h, ls, r);
+	else if (!strcmp(rq, LVMPD_REQ_DUMP))
+		return dump_state(h, ls, r);
+	else
+		return reply(LVMPD_RESP_EINVAL, REASON_REQ_NOT_IMPLEMENTED);
+}
+
+static int process_timeout_arg(const char *str, unsigned *max_timeouts)
+{
+	char *endptr;
+	unsigned long l;
+
+	errno = 0;
+	l = strtoul(str, &endptr, 10);
+	if (errno || *endptr || l >= UINT_MAX)
+		return 0;
+
+	*max_timeouts = (unsigned) l;
+
+	return 1;
+}
+
+/* Client functionality */
+typedef int (*action_fn_t) (void *args);
+
+struct log_line_baton {
+	const char *prefix;
+};
+
+daemon_handle _lvmpolld = { .error = 0 };
+
+static daemon_handle _lvmpolld_open(const char *socket)
+{
+	daemon_info lvmpolld_info = {
+		.path = "lvmpolld",
+		.socket = socket ?: DEFAULT_RUN_DIR "/lvmpolld.socket",
+		.protocol = LVMPOLLD_PROTOCOL,
+		.protocol_version = LVMPOLLD_PROTOCOL_VERSION
+	};
+
+	return daemon_open(lvmpolld_info);
+}
+
+static void _log_line(const char *line, void *baton) {
+	struct log_line_baton *b = baton;
+	fprintf(stdout, "%s%s\n", b->prefix, line);
+}
+
+static int printout_raw_response(const char *prefix, const char *msg)
+{
+	struct log_line_baton b = { .prefix = prefix };
+	char *buf;
+	char *pos;
+
+	buf = dm_strdup(msg);
+	pos = buf;
+
+	if (!buf)
+		return 0;
+
+	while (pos) {
+		char *next = strchr(pos, '\n');
+		if (next)
+			*next = 0;
+		_log_line(pos, &b);
+		pos = next ? next + 1 : 0;
+	}
+	dm_free(buf);
+
+	return 1;
+}
+
+/* place all action implementations below */
+
+static int action_dump(void *args __attribute__((unused)))
+{
+	daemon_request req;
+	daemon_reply repl;
+	int r = 0;
+
+	req = daemon_request_make(LVMPD_REQ_DUMP);
+	if (!req.cft) {
+		fprintf(stderr, "Failed to create lvmpolld " LVMPD_REQ_DUMP " request.\n");
+		goto out_req;
+	}
+
+	repl = daemon_send(_lvmpolld, req);
+	if (repl.error) {
+		fprintf(stderr, "Failed to send a request or receive response.\n");
+		goto  out_rep;
+	}
+
+	/*
+	 * This is dumb copy & paste from libdaemon log routines.
+	 */
+	if (!printout_raw_response("  ", repl.buffer.mem)) {
+		fprintf(stderr, "Failed to print out the response.\n");
+		goto  out_rep;
+	}
+
+	r = 1;
+
+out_rep:
+	daemon_reply_destroy(repl);
+out_req:
+	daemon_request_destroy(req);
+
+	return r;
+}
+
+enum action_index {
+	ACTION_DUMP = 0,
+	ACTION_MAX /* keep at the end */
+};
+
+static const action_fn_t actions[ACTION_MAX] = { [ACTION_DUMP] = action_dump };
+
+static int _make_action(enum action_index idx, void *args)
+{
+	return idx < ACTION_MAX ? actions[idx](args) : 0;
+}
+
+static int _lvmpolld_client(const char *socket, unsigned action)
+{
+	int r;
+
+	_lvmpolld = _lvmpolld_open(socket);
+
+	if (_lvmpolld.error || _lvmpolld.socket_fd < 0) {
+		fprintf(stderr, "Failed to establish connection with lvmpolld.\n");
+		return 0;
+	}
+
+	r = _make_action(action, NULL);
+
+	daemon_close(_lvmpolld);
+
+	return r ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int action_idx = ACTION_MAX;
+static struct option long_options[] = {
+	/* Have actions always at the beginning of the array. */
+	{"dump",	no_argument,		&action_idx,	ACTION_DUMP }, /* or an option_index ? */
+
+	/* other options */
+	{"binary",	required_argument,	0,		'B' },
+	{"foreground",	no_argument,		0,		'f' },
+	{"help",	no_argument,		0,		'h' },
+	{"log",		required_argument,	0,		'l' },
+	{"pidfile",	required_argument,	0,		'p' },
+	{"socket",	required_argument,	0,		's' },
+	{"timeout",	required_argument,	0,		't' },
+	{"version",	no_argument,		0,		'V' },
+	{0,		0,			0,		0 }
+};
+
+int main(int argc, char *argv[])
+{
+	int opt;
+	int option_index = 0;
+	int client = 0, server = 0;
+	unsigned action = ACTION_MAX;
+	struct timeval timeout;
+	daemon_idle di = { .ptimeout = &timeout };
+	struct lvmpolld_state ls = { .log_config = "" };
+	daemon_state s = {
+		.daemon_fini = _fini,
+		.daemon_init = _init,
+		.handler = _handler,
+		.name = "lvmpolld",
+		.pidfile = getenv("LVM_LVMPOLLD_PIDFILE") ?: LVMPOLLD_PIDFILE,
+		.private = &ls,
+		.protocol = LVMPOLLD_PROTOCOL,
+		.protocol_version = LVMPOLLD_PROTOCOL_VERSION,
+		.socket_path = getenv("LVM_LVMPOLLD_SOCKET") ?: LVMPOLLD_SOCKET,
+	};
+
+	while ((opt = getopt_long(argc, argv, "fhVl:p:s:B:t:", long_options, &option_index)) != -1) {
+		switch (opt) {
+		case 0 :
+			if (action < ACTION_MAX) {
+				fprintf(stderr, "Can't perform more actions. Action already requested: %s\n",
+					long_options[action].name);
+				_usage(argv[0], stderr);
+				exit(EXIT_FAILURE);
+			}
+			action = action_idx;
+			client = 1;
+			break;
+		case '?':
+			_usage(argv[0], stderr);
+			exit(EXIT_FAILURE);
+		case 'B': /* --binary */
+			ls.lvm_binary = optarg;
+			server = 1;
+			break;
+		case 'V': /* --version */
+			printf("lvmpolld version: " LVM_VERSION "\n");
+			exit(EXIT_SUCCESS);
+		case 'f': /* --foreground */
+			s.foreground = 1;
+			server = 1;
+			break;
+		case 'h': /* --help */
+			_usage(argv[0], stdout);
+			exit(EXIT_SUCCESS);
+		case 'l': /* --log */
+			ls.log_config = optarg;
+			server = 1;
+			break;
+		case 'p': /* --pidfile */
+			s.pidfile = optarg;
+			server = 1;
+			break;
+		case 's': /* --socket */
+			s.socket_path = optarg;
+			break;
+		case 't': /* --timeout in seconds */
+			if (!process_timeout_arg(optarg, &di.max_timeouts)) {
+				fprintf(stderr, "Invalid value of timeout parameter.\n");
+				exit(EXIT_FAILURE);
+			}
+			/* 0 equals to wait indefinitely */
+			if (di.max_timeouts)
+				s.idle = ls.idle = &di;
+			server = 1;
+			break;
+		}
+	}
+
+	if (client && server) {
+		fprintf(stderr, "Invalid combination of client and server parameters.\n\n");
+		_usage(argv[0], stdout);
+		exit(EXIT_FAILURE);
+	}
+
+	if (client)
+		return _lvmpolld_client(s.socket_path, action);
+
+	/* Server */
+	daemon_start(s);
+
+	return EXIT_SUCCESS;
+}
--- a/daemons/lvmpolld/lvmpolld-data-utils.c
+++ b/daemons/lvmpolld/lvmpolld-data-utils.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "config-util.h"
+
+#include <fcntl.h>
+#include <signal.h>
+
+static char *_construct_full_lvname(const char *vgname, const char *lvname)
+{
+	char *name;
+	size_t l;
+
+	l = strlen(vgname) + strlen(lvname) + 2; /* vg/lv and \0 */
+	name = (char *) dm_malloc(l * sizeof(char));
+	if (!name)
+		return NULL;
+
+	if (dm_snprintf(name, l, "%s/%s", vgname, lvname) < 0) {
+		dm_free(name);
+		name = NULL;
+	}
+
+	return name;
+}
+
+static char *_construct_lvm_system_dir_env(const char *sysdir)
+{
+	/*
+	 *  Store either "LVM_SYSTEM_DIR=/path/to..."
+	 *		    - or -
+	 *  just single char to store NULL byte
+	 */
+	size_t l = sysdir ? strlen(sysdir) + 16 : 1;
+	char *env = (char *) dm_malloc(l * sizeof(char));
+
+	if (!env)
+		return NULL;
+
+	*env = '\0';
+
+	if (sysdir && dm_snprintf(env, l, "LVM_SYSTEM_DIR=%s", sysdir) < 0) {
+		dm_free(env);
+		env = NULL;
+	}
+
+	return env;
+}
+
+static const char *_get_lvid(const char *lvmpolld_id, const char *sysdir)
+{
+	return lvmpolld_id ? (lvmpolld_id + (sysdir ? strlen(sysdir) : 0)) : NULL;
+}
+
+char *construct_id(const char *sysdir, const char *uuid)
+{
+	char *id;
+	int r;
+	size_t l;
+
+	l = strlen(uuid) + (sysdir ? strlen(sysdir) : 0) + 1;
+	id = (char *) dm_malloc(l * sizeof(char));
+	if (!id)
+		return NULL;
+
+	r = sysdir ? dm_snprintf(id, l, "%s%s", sysdir, uuid) :
+		     dm_snprintf(id, l, "%s", uuid);
+
+	if (r < 0) {
+		dm_free(id);
+		id = NULL;
+	}
+
+	return id;
+}
+
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+			   const char *vgname, const char *lvname,
+			   const char *sysdir, enum poll_type type,
+			   const char *sinterval, unsigned pdtimeout,
+			   struct lvmpolld_store *pdst)
+{
+	char *lvmpolld_id = dm_strdup(id), /* copy */
+	     *full_lvname = _construct_full_lvname(vgname, lvname), /* copy */
+	     *lvm_system_dir_env = _construct_lvm_system_dir_env(sysdir); /* copy */
+
+	struct lvmpolld_lv tmp = {
+		.ls = ls,
+		.type = type,
+		.lvmpolld_id = lvmpolld_id,
+		.lvid = _get_lvid(lvmpolld_id, sysdir),
+		.lvname = full_lvname,
+		.lvm_system_dir_env = lvm_system_dir_env,
+		.sinterval = dm_strdup(sinterval), /* copy */
+		.pdtimeout = pdtimeout < MIN_POLLING_TIMEOUT ? MIN_POLLING_TIMEOUT : pdtimeout,
+		.cmd_state = { .retcode = -1, .signal = 0 },
+		.pdst = pdst,
+		.init_rq_count = 1
+	}, *pdlv = (struct lvmpolld_lv *) dm_malloc(sizeof(struct lvmpolld_lv));
+
+	if (!pdlv || !tmp.lvid || !tmp.lvname || !tmp.lvm_system_dir_env || !tmp.sinterval)
+		goto err;
+
+	memcpy(pdlv, &tmp, sizeof(*pdlv));
+
+	if (pthread_mutex_init(&pdlv->lock, NULL))
+		goto err;
+
+	return pdlv;
+
+err:
+	dm_free((void *)full_lvname);
+	dm_free((void *)lvmpolld_id);
+	dm_free((void *)lvm_system_dir_env);
+	dm_free((void *)tmp.sinterval);
+	dm_free((void *)pdlv);
+
+	return NULL;
+}
+
+void pdlv_destroy(struct lvmpolld_lv *pdlv)
+{
+	dm_free((void *)pdlv->lvmpolld_id);
+	dm_free((void *)pdlv->lvname);
+	dm_free((void *)pdlv->sinterval);
+	dm_free((void *)pdlv->lvm_system_dir_env);
+	dm_free((void *)pdlv->cmdargv);
+	dm_free((void *)pdlv->cmdenvp);
+
+	pthread_mutex_destroy(&pdlv->lock);
+
+	dm_free((void *)pdlv);
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv)
+{
+	unsigned ret;
+
+	pdlv_lock(pdlv);
+	ret = pdlv->polling_finished;
+	pdlv_unlock(pdlv);
+
+	return ret;
+}
+
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv)
+{
+	struct lvmpolld_lv_state r;
+
+	pdlv_lock(pdlv);
+	r.error = pdlv_locked_error(pdlv);
+	r.polling_finished = pdlv_locked_polling_finished(pdlv);
+	r.cmd_state = pdlv_locked_cmd_state(pdlv);
+	pdlv_unlock(pdlv);
+
+	return r;
+}
+
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state)
+{
+	pdlv_lock(pdlv);
+	pdlv->cmd_state = *cmd_state;
+	pdlv_unlock(pdlv);
+}
+
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error)
+{
+	pdlv_lock(pdlv);
+	pdlv->error = error;
+	pdlv_unlock(pdlv);
+}
+
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished)
+{
+	pdlv_lock(pdlv);
+	pdlv->polling_finished = finished;
+	pdlv_unlock(pdlv);
+}
+
+struct lvmpolld_store *pdst_init(const char *name)
+{
+	struct lvmpolld_store *pdst = (struct lvmpolld_store *) dm_malloc(sizeof(struct lvmpolld_store));
+	if (!pdst)
+		return NULL;
+
+	pdst->store = dm_hash_create(32);
+	if (!pdst->store)
+		goto err_hash;
+	if (pthread_mutex_init(&pdst->lock, NULL))
+		goto err_mutex;
+
+	pdst->name = name;
+	pdst->active_polling_count = 0;
+
+	return pdst;
+
+err_mutex:
+	dm_hash_destroy(pdst->store);
+err_hash:
+	dm_free(pdst);
+	return NULL;
+}
+
+void pdst_destroy(struct lvmpolld_store *pdst)
+{
+	if (!pdst)
+		return;
+
+	dm_hash_destroy(pdst->store);
+	pthread_mutex_destroy(&pdst->lock);
+	dm_free(pdst);
+}
+
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_lock(dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_unlock(dm_hash_get_data(pdst->store, n));
+}
+
+static void _pdlv_locked_dump(struct buffer *buff, const struct lvmpolld_lv *pdlv)
+{
+	char tmp[1024];
+	const struct lvmpolld_cmd_stat *cmd_state = &pdlv->cmd_state;
+
+	/* pdlv-section { */
+	if (dm_snprintf(tmp, sizeof(tmp), "\t%s {\n", pdlv->lvmpolld_id) > 0)
+		buffer_append(buff, tmp);
+
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvid=\"%s\"\n", pdlv->lvid) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\ttype=\"%s\"\n", polling_op(pdlv->type)) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvname=\"%s\"\n", pdlv->lvname) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvmpolld_internal_timeout=%d\n", pdlv->pdtimeout) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_interval=\"%s\"\n", pdlv->sinterval ?: "<undefined>") > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tLVM_SYSTEM_DIR=\"%s\"\n",
+			(*pdlv->lvm_system_dir_env ? (pdlv->lvm_system_dir_env + strlen("LVM_SYSTEM_DIR=")) : "<undefined>")) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_pid=%d\n", pdlv->cmd_pid) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tpolling_finished=%d\n", pdlv->polling_finished) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\terror_occured=%d\n", pdlv->error) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tinit_requests_count=%d\n", pdlv->init_rq_count) > 0)
+		buffer_append(buff, tmp);
+
+	/* lvm_commmand-section { */
+	buffer_append(buff, "\t\tlvm_command {\n");
+	if (cmd_state->retcode == -1 && !cmd_state->signal)
+		buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_IN_PROGRESS "\"\n");
+	else {
+		buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_FINISHED "\"\n");
+		if (dm_snprintf(tmp, sizeof(tmp), "\t\t\treason=\"%s\"\n\t\t\tvalue=%d\n",
+				(cmd_state->signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE),
+				(cmd_state->signal ?: cmd_state->retcode)) > 0)
+			buffer_append(buff, tmp);
+	}
+	buffer_append(buff, "\t\t}\n");
+	/* } lvm_commmand-section */
+
+	buffer_append(buff, "\t}\n");
+	/* } pdlv-section */
+}
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		_pdlv_locked_dump(buff, dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst)
+{
+	struct lvmpolld_lv *pdlv;
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store) {
+		pdlv = dm_hash_get_data(pdst->store, n);
+		if (!pdlv_locked_polling_finished(pdlv))
+			pthread_cancel(pdlv->tid);
+	}
+}
+
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_destroy(dm_hash_get_data(pdst->store, n));
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv)
+{
+	struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) dm_malloc(sizeof(struct lvmpolld_thread_data));
+	if (!data)
+		return NULL;
+
+	data->pdlv = NULL;
+	data->line = NULL;
+	data->line_size = 0;
+	data->fout = data->ferr = NULL;
+	data->outpipe[0] = data->outpipe[1] = data->errpipe[0] = data->errpipe[1] = -1;
+
+	if (pipe(data->outpipe) || pipe(data->errpipe)) {
+		lvmpolld_thread_data_destroy(data);
+		return NULL;
+	}
+
+	if (fcntl(data->outpipe[0], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->outpipe[1], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->errpipe[0], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->errpipe[1], F_SETFD, FD_CLOEXEC)) {
+		lvmpolld_thread_data_destroy(data);
+		return NULL;
+	}
+
+	data->pdlv = pdlv;
+
+	return data;
+}
+
+void lvmpolld_thread_data_destroy(void *thread_private)
+{
+	struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) thread_private;
+	if (!data)
+		return;
+
+	if (data->pdlv) {
+		pdst_lock(data->pdlv->pdst);
+		/*
+		 * FIXME: skip this step if lvmpolld is activated
+		 * 	  by systemd.
+		 */
+		if (!pdlv_get_polling_finished(data->pdlv))
+			kill(data->pdlv->cmd_pid, SIGTERM);
+		pdlv_set_polling_finished(data->pdlv, 1);
+		pdst_locked_dec(data->pdlv->pdst);
+		pdst_unlock(data->pdlv->pdst);
+	}
+
+	/* may get reallocated in getline(). dm_free must not be used */
+	free(data->line);
+
+	if (data->fout && !fclose(data->fout))
+		data->outpipe[0] = -1;
+
+	if (data->ferr && !fclose(data->ferr))
+		data->errpipe[0] = -1;
+
+	if (data->outpipe[0] >= 0)
+		(void) close(data->outpipe[0]);
+
+	if (data->outpipe[1] >= 0)
+		(void) close(data->outpipe[1]);
+
+	if (data->errpipe[0] >= 0)
+		(void) close(data->errpipe[0]);
+
+	if (data->errpipe[1] >= 0)
+		(void) close(data->errpipe[1]);
+
+	dm_free(data);
+}
--- a/daemons/lvmpolld/lvmpolld-data-utils.h
+++ b/daemons/lvmpolld/lvmpolld-data-utils.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_DATA_UTILS_H
+#define _LVM_LVMPOLLD_DATA_UTILS_H
+
+#include <pthread.h>
+
+struct buffer;
+struct lvmpolld_state;
+
+enum poll_type {
+	PVMOVE = 0,
+	CONVERT,
+	MERGE,
+	MERGE_THIN,
+	POLL_TYPE_MAX
+};
+
+struct lvmpolld_cmd_stat {
+	int retcode;
+	int signal;
+};
+
+struct lvmpolld_store {
+	pthread_mutex_t lock;
+	void *store;
+	const char *name;
+	unsigned active_polling_count;
+};
+
+struct lvmpolld_lv {
+	/*
+	 * accessing following vars doesn't
+	 * require struct lvmpolld_lv lock
+	 */
+	struct lvmpolld_state *const ls;
+	const enum poll_type type;
+	const char *const lvid;
+	const char *const lvmpolld_id;
+	const char *const lvname; /* full vg/lv name */
+	const unsigned pdtimeout; /* in seconds */
+	const char *const sinterval;
+	const char *const lvm_system_dir_env;
+	struct lvmpolld_store *const pdst;
+	const char *const *cmdargv;
+	const char *const *cmdenvp;
+
+	/* only used by write */
+	pid_t cmd_pid;
+	pthread_t tid;
+
+	pthread_mutex_t lock;
+
+	/* block of shared variables protected by lock */
+	struct lvmpolld_cmd_stat cmd_state;
+	unsigned init_rq_count; /* for debuging purposes only */
+	unsigned polling_finished:1; /* no more updates */
+	unsigned error:1; /* unrecoverable error occured in lvmpolld */
+};
+
+typedef void (*lvmpolld_parse_output_fn_t) (struct lvmpolld_lv *pdlv, const char *line);
+
+/* TODO: replace with configuration option */
+#define MIN_POLLING_TIMEOUT 60
+
+struct lvmpolld_lv_state {
+	unsigned error:1;
+	unsigned polling_finished:1;
+	struct lvmpolld_cmd_stat cmd_state;
+};
+
+struct lvmpolld_thread_data {
+	char *line;
+	size_t line_size;
+	int outpipe[2];
+	int errpipe[2];
+	FILE *fout;
+	FILE *ferr;
+	char buf[1024];
+	struct lvmpolld_lv *pdlv;
+};
+
+char *construct_id(const char *sysdir, const char *lvid);
+
+/* LVMPOLLD_LV_T section */
+
+/* only call with appropriate struct lvmpolld_store lock held */
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+			   const char *vgname, const char *lvname,
+			   const char *sysdir, enum poll_type type,
+			   const char *sinterval, unsigned pdtimeout,
+			   struct lvmpolld_store *pdst);
+
+/* only call with appropriate struct lvmpolld_store lock held */
+void pdlv_destroy(struct lvmpolld_lv *pdlv);
+
+static inline void pdlv_lock(struct lvmpolld_lv *pdlv)
+{
+	pthread_mutex_lock(&pdlv->lock);
+}
+
+static inline void pdlv_unlock(struct lvmpolld_lv *pdlv)
+{
+	pthread_mutex_unlock(&pdlv->lock);
+}
+
+/*
+ * no struct lvmpolld_lv lock required section
+ */
+static inline int pdlv_is_type(const struct lvmpolld_lv *pdlv, enum poll_type type)
+{
+	return pdlv->type == type;
+}
+
+static inline unsigned pdlv_get_timeout(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->pdtimeout;
+}
+
+static inline enum poll_type pdlv_get_type(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->type;
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv);
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv);
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state);
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error);
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished);
+
+/*
+ * struct lvmpolld_lv lock required section
+ */
+static inline struct lvmpolld_cmd_stat pdlv_locked_cmd_state(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->cmd_state;
+}
+
+static inline int pdlv_locked_polling_finished(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->polling_finished;
+}
+
+static inline unsigned pdlv_locked_error(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->error;
+}
+
+/* struct lvmpolld_store manipulation routines */
+
+struct lvmpolld_store *pdst_init(const char *name);
+void pdst_destroy(struct lvmpolld_store *pdst);
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff);
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst);
+
+static inline void pdst_lock(struct lvmpolld_store *pdst)
+{
+	pthread_mutex_lock(&pdst->lock);
+}
+
+static inline void pdst_unlock(struct lvmpolld_store *pdst)
+{
+	pthread_mutex_unlock(&pdst->lock);
+}
+
+static inline void pdst_locked_inc(struct lvmpolld_store *pdst)
+{
+	pdst->active_polling_count++;
+}
+
+static inline void pdst_locked_dec(struct lvmpolld_store *pdst)
+{
+	pdst->active_polling_count--;
+}
+
+static inline unsigned pdst_locked_get_active_count(const struct lvmpolld_store *pdst)
+{
+	return pdst->active_polling_count;
+}
+
+static inline int pdst_locked_insert(struct lvmpolld_store *pdst, const char *key, struct lvmpolld_lv *pdlv)
+{
+	return dm_hash_insert(pdst->store, key, pdlv);
+}
+
+static inline struct lvmpolld_lv *pdst_locked_lookup(struct lvmpolld_store *pdst, const char *key)
+{
+	return dm_hash_lookup(pdst->store, key);
+}
+
+static inline void pdst_locked_remove(struct lvmpolld_store *pdst, const char *key)
+{
+	dm_hash_remove(pdst->store, key);
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv);
+void lvmpolld_thread_data_destroy(void *thread_private);
+
+#endif /* _LVM_LVMPOLLD_DATA_UTILS_H */
--- a/daemons/lvmpolld/lvmpolld-protocol.h
+++ b/daemons/lvmpolld/lvmpolld-protocol.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_PROTOCOL_H
+#define _LVM_LVMPOLLD_PROTOCOL_H
+
+#include "polling_ops.h"
+
+#define LVMPOLLD_PROTOCOL "lvmpolld"
+#define LVMPOLLD_PROTOCOL_VERSION 1
+
+#define LVMPD_REQ_CONVERT	CONVERT_POLL
+#define LVMPD_REQ_DUMP		"dump"
+#define LVMPD_REQ_MERGE		MERGE_POLL
+#define LVMPD_REQ_MERGE_THIN	MERGE_THIN_POLL
+#define LVMPD_REQ_PROGRESS	"progress_info"
+#define LVMPD_REQ_PVMOVE	PVMOVE_POLL
+
+#define LVMPD_PARM_ABORT		"abort"
+#define LVMPD_PARM_HANDLE_MISSING_PVS	"handle_missing_pvs"
+#define LVMPD_PARM_INTERVAL		"interval"
+#define LVMPD_PARM_LVID			"lvid"
+#define LVMPD_PARM_LVNAME		"lvname"
+#define LVMPD_PARM_SYSDIR		"sysdir"
+#define LVMPD_PARM_VALUE		"value" /* either retcode or signal value */
+#define LVMPD_PARM_VGNAME		"vgname"
+
+#define LVMPD_RESP_FAILED	"failed"
+#define LVMPD_RESP_FINISHED	"finished"
+#define LVMPD_RESP_IN_PROGRESS	"in_progress"
+#define LVMPD_RESP_EINVAL	"invalid"
+#define LVMPD_RESP_NOT_FOUND	"not_found"
+#define LVMPD_RESP_OK		"OK"
+
+#define LVMPD_REAS_RETCODE	"retcode" /* lvm cmd ret code */
+#define LVMPD_REAS_SIGNAL	"signal" /* lvm cmd terminating singal */
+
+#define LVMPD_RET_DUP_FAILED	100
+#define LVMPD_RET_EXC_FAILED	101
+
+#endif /* _LVM_LVMPOLLD_PROTOCOL_H */
--- a/daemons/lvmpolld/polling_ops.h
+++ b/daemons/lvmpolld/polling_ops.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_TOOL_POLLING_OPS_H
+#define _LVM_TOOL_POLLING_OPS_H
+
+/* this file is also part of lvmpolld protocol */
+
+#define PVMOVE_POLL "pvmove"
+#define CONVERT_POLL "convert"
+#define MERGE_POLL "merge"
+#define MERGE_THIN_POLL "merge_thin"
+
+#endif /* _LVM_TOOL_POLLING_OPS_H */
--- a/doc/caching_foreign_vgs.txt
+++ b/doc/caching_foreign_vgs.txt
@@ -0,0 +1,86 @@
+Q: Why should lvmetad cache foreign VGs?
+A: It's the most useful behavior in the "steady state".
+
+How to arrive at that conclusion.
+Four code configurations to consider, each in two different circumstances.
+
+configurations:
+
+1. lvm not using lvmetad
+2. lvm using lvmetad and lvmlockd
+3. lvm using lvmetad, and lvmetad does not cache foreign VGs
+   (Not currently implemented.)
+4. lvm using lvmetad, and lvmetad caches foreign VGs
+
+circumstances:
+
+A. steady state: PVs are not added or removed to/from foreign VGs
+B. transient state: PVs are added or removed to/from foreign VGs
+
+combinations:
+
+1.A. A PV is correctly shown in the foreign VG.
+1.B. A PV is correctly shown in the foreign VG.
+
+The most accurate representation, at the cost of always scanning disks.
+
+
+2.A. A PV is correctly shown in the foreign VG.
+2.B. A PV is correctly shown in the foreign VG.
+
+The most accurate representation, at the cost of using lvmlockd.
+
+
+3.A. A PV in a foreign VG is shown as unused.
+3.B. A PV in a foreign VG is shown as unused.
+
+If lvmetad ignores foreign VGs and does not cache them, the PVs in the
+foreign VGs appear to be unused.  This largely defeats the purpose of
+system_id, which is meant to treat VGs/PVs as foreign instead of free
+(albeit imperfectly, see below.)
+
+
+4.A. A PV is correctly shown in the foreign VG.
+4.B. A PV is not correctly shown in the foreign VG.
+
+This avoids the cost of always scanning disks, and avoids the cost of
+using lvmlockd.  The steady state 4.A. is an improvement over the steady
+state 3.A.  When the steady state is the common case, this is a big
+advantage.  When the steady state is *not* the common case, the foreign VG
+concept is not as useful (if shared devices are this dynamic, lvmlockd
+should be considered.)
+
+The limitations related to the transient state 4.B. are explained in
+lvmsystemid(7), along with how to handle it.  The specific inaccuracies
+possible in 4.B. are:
+
+. PV is shown as belonging to a foreign VG, but is actually unused.
+. PV is shown as unused, but actually belongs to a foreign VG.
+
+To resolve the inaccuracies in the transient state (4.B.), and return the
+system to an accurate steady state (4.A.), the disks need to be scanned,
+which updates lvmetad.  The scanning/updating is a manual step, i.e.
+running 'pvscan --cache', which by definition scans disks and updates
+lvmetad.
+
+--
+
+The --foreign command line option for report/display commands
+(vgs/lvs/pvs/vgdisplay/lvdisplay/pvdisplay) is not directly related to
+whether or not lvmetad caches foreign VGs.
+
+By default, foreign VGs are silently ignored and not printed by these
+commands.  However, when the --foreign option is used, these commands do
+produce output about foreign VGs.
+
+(When --foreign is not used, and the command specifically requests a
+foreign VG by name, an error is produced about not accessing foreign VGs,
+and the foreign VG is not displayed.)
+
+The decision to report/display foreign VGs or not is independent of
+whether lvmetad is caching those VGs.  When lvmetad is caching the foreign
+VG, a report/display command run with --foreign will scan disks to read
+the foreign VG and give the most up to date version of it (the copy of the
+foreign VG in lvmetad may be out of date due to changes to the VG by the
+foreign host.)
+
--- a/doc/kernel/cache-policies.txt
+++ b/doc/kernel/cache-policies.txt
@@ -30,28 +30,48 @@ multiqueue

 This policy is the default.

-The multiqueue policy has two sets of 16 queues: one set for entries
-waiting for the cache and another one for those in the cache.
+The multiqueue policy has three sets of 16 queues: one set for entries
+waiting for the cache and another two for those in the cache (a set for
+clean entries and a set for dirty entries).
+
 Cache entries in the queues are aged based on logical time. Entry into
 the cache is based on variable thresholds and queue selection is based
 on hit count on entry. The policy aims to take different cache miss
 costs into account and to adjust to varying load patterns automatically.

 Message and constructor argument pairs are:
-	'sequential_threshold <#nr_sequential_ios>' and
-	'random_threshold <#nr_random_ios>'.
+	'sequential_threshold <#nr_sequential_ios>'
+	'random_threshold <#nr_random_ios>'
+	'read_promote_adjustment <value>'
+	'write_promote_adjustment <value>'
+	'discard_promote_adjustment <value>'

 The sequential threshold indicates the number of contiguous I/Os
-required before a stream is treated as sequential.  The random threshold
+required before a stream is treated as sequential.  Once a stream is
+considered sequential it will bypass the cache.  The random threshold
 is the number of intervening non-contiguous I/Os that must be seen
 before the stream is treated as random again.

 The sequential and random thresholds default to 512 and 4 respectively.

-Large, sequential ios are probably better left on the origin device
-since spindles tend to have good bandwidth. The io_tracker counts
-contiguous I/Os to try to spot when the io is in one of these sequential
-modes.
+Large, sequential I/Os are probably better left on the origin device
+since spindles tend to have good sequential I/O bandwidth.  The
+io_tracker counts contiguous I/Os to try to spot when the I/O is in one
+of these sequential modes.  But there are use-cases for wanting to
+promote sequential blocks to the cache (e.g. fast application startup).
+If sequential threshold is set to 0 the sequential I/O detection is
+disabled and sequential I/O will no longer implicitly bypass the cache.
+Setting the random threshold to 0 does _not_ disable the random I/O
+stream detection.
+
+Internally the mq policy determines a promotion threshold.  If the hit
+count of a block not in the cache goes above this threshold it gets
+promoted to the cache.  The read, write and discard promote adjustment
+tunables allow you to tweak the promotion threshold by adding a small
+value based on the io type.  They default to 4, 8 and 1 respectively.
+If you're trying to quickly warm a new cache device you may wish to
+reduce these to encourage promotion.  Remember to switch them back to
+their defaults after the cache fills though.

 cleaner
 -------
--- a/doc/kernel/cache.txt
+++ b/doc/kernel/cache.txt
@@ -50,14 +50,16 @@ other parameters detailed later):
   which are dirty, and extra hints for use by the policy object.
   This information could be put on the cache device, but having it
   separate allows the volume manager to configure it differently,
-   e.g. as a mirror for extra robustness.
+   e.g. as a mirror for extra robustness.  This metadata device may only
+   be used by a single cache device.

 Fixed block size
 ----------------

 The origin is divided up into blocks of a fixed size.  This block size
 is configurable when you first create the cache.  Typically we've been
-using block sizes of 256k - 1024k.
+using block sizes of 256KB - 1024KB.  The block size must be between 64
+(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).

 Having a fixed block size simplifies the target a lot.  But it is
 something of a compromise.  For instance, a small part of a block may be
@@ -66,10 +68,11 @@ So large block sizes are bad because they waste cache space.  And small
 block sizes are bad because they increase the amount of metadata (both
 in core and on disk).

-Writeback/writethrough
----------------------
+Cache operating modes
+---------------------

-The cache has two modes, writeback and writethrough.
+The cache has three operating modes: writeback, writethrough and
+passthrough.

 If writeback, the default, is selected then a write to a block that is
 cached will go only to the cache and the block will be marked dirty in
@@ -79,15 +82,38 @@ If writethrough is selected then a write to a cached block will not
 complete until it has hit both the origin and cache devices.  Clean
 blocks should remain clean.

+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates.  To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency.  Coherency that exists is maintained, although
+the cache will gradually cool as writes take place.  If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm.  Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
 A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache.  Useful for decommissioning a cache.
+dirty blocks in a cache.  Useful for decommissioning a cache or when
+shrinking a cache.  Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean.  If the
+area being removed from the cache still contains dirty blocks the resize
+will fail.  Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean.  This is of particular
+importance if writeback mode is used.  Writethrough and passthrough
+modes already maintain a clean cache.  Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.

 Migration throttling
 --------------------

 Migrating data between the origin and cache device uses bandwidth.
 The user can set a throttle to prevent more than a certain amount of
-migration occuring at any one time.  Currently we're not taking any
+migration occurring at any one time.  Currently we're not taking any
 account of normal io traffic going to the devices.  More work needs
 doing here to avoid migrating during those peak io moments.

@@ -98,12 +124,11 @@ the default being 204800 sectors (or 100MB).
 Updating on-disk metadata
 -------------------------

-On-disk metadata is committed every time a REQ_SYNC or REQ_FUA bio is
-written.  If no such requests are made then commits will occur every
-second.  This means the cache behaves like a physical disk that has a
-write cache (the same is true of the thin-provisioning target).  If
-power is lost you may lose some recent writes.  The metadata should
-always be consistent in spite of any crash.
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the cache behaves like a physical disk that has a volatile write
+cache.  If power is lost you may lose some recent writes.  The metadata
+should always be consistent in spite of any crash.

 The 'dirty' state for a cache block changes far too frequently for us
 to keep updating it on the fly.  So we treat it as a hint.  In normal
@@ -159,7 +184,7 @@ Constructor
 block size      : cache unit size in sectors

 #feature args   : number of feature arguments passed
- feature args    : writethrough.  (The default is writeback.)
+ feature args    : writethrough or passthrough (The default is writeback.)

 policy          : the replacement policy to use
 #policy args    : an even number of arguments corresponding to
@@ -175,6 +200,13 @@ Optional feature arguments are:
 		   back cache block contents later for performance reasons,
 		   so they may differ from the corresponding origin blocks.

+   passthrough	 : a degraded mode useful for various cache coherency
+		   situations (e.g., rolling back snapshots of
+		   underlying storage).	 Reads and writes always go to
+		   the origin.	If a write goes to a cached origin
+		   block, then the cache block is invalidated.
+		   To enable passthrough mode the cache must be clean.
+
 A policy called 'default' is always registered.  This is an alias for
 the policy we currently think is giving best all round performance.

@@ -184,36 +216,43 @@ the characteristics of a specific policy, always request it by name.
 Status
 ------

-<#used metadata blocks>/<#total metadata blocks> <#read hits> <#read misses>
-<#write hits> <#write misses> <#demotions> <#promotions> <#blocks in cache>
-<#dirty> <#features> <features>* <#core args> <core args>* <#policy args>
-<policy args>*
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<cache block size> <#used cache blocks>/<#total cache blocks>
+<#read hits> <#read misses> <#write hits> <#write misses>
+<#demotions> <#promotions> <#dirty> <#features> <features>*
+<#core args> <core args>* <policy name> <#policy args> <policy args>*

-#used metadata blocks    : Number of metadata blocks used
-#total metadata blocks   : Total number of metadata blocks
-#read hits               : Number of times a READ bio has been mapped
+metadata block size	 : Fixed block size for each metadata block in
+			     sectors
+#used metadata blocks	 : Number of metadata blocks used
+#total metadata blocks	 : Total number of metadata blocks
+cache block size	 : Configurable block size for the cache device
+			     in sectors
+#used cache blocks	 : Number of blocks resident in the cache
+#total cache blocks	 : Total number of cache blocks
+#read hits		 : Number of times a READ bio has been mapped
 			     to the cache
-#read misses             : Number of times a READ bio has been mapped
+#read misses		 : Number of times a READ bio has been mapped
 			     to the origin
-#write hits              : Number of times a WRITE bio has been mapped
+#write hits		 : Number of times a WRITE bio has been mapped
 			     to the cache
-#write misses            : Number of times a WRITE bio has been
+#write misses		 : Number of times a WRITE bio has been
 			     mapped to the origin
-#demotions               : Number of times a block has been removed
+#demotions		 : Number of times a block has been removed
 			     from the cache
-#promotions              : Number of times a block has been moved to
+#promotions		 : Number of times a block has been moved to
 			     the cache
-#blocks in cache         : Number of blocks resident in the cache
-#dirty                   : Number of blocks in the cache that differ
+#dirty			 : Number of blocks in the cache that differ
 			     from the origin
-#feature args            : Number of feature args to follow
-feature args             : 'writethrough' (optional)
-#core args               : Number of core arguments (must be even)
-core args                : Key/value pairs for tuning the core
+#feature args		 : Number of feature args to follow
+feature args		 : 'writethrough' (optional)
+#core args		 : Number of core arguments (must be even)
+core args		 : Key/value pairs for tuning the core
 			     e.g. migration_threshold
-#policy args             : Number of policy arguments to follow (must be even)
-policy args              : Key/value pairs
-			     e.g. 'sequential_threshold 1024
+policy name		 : Name of the policy
+#policy args		 : Number of policy arguments to follow (must be even)
+policy args		 : Key/value pairs
+			     e.g. sequential_threshold

 Messages
 --------
@@ -229,12 +268,28 @@ The message format is:
 E.g.
   dmsetup message my_cache 0 sequential_threshold 1024

+
+Invalidation is removing an entry from the cache without writing it
+back.  Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges.  Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9.  Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexidecimal may be needed to better support efficient
+invalidation of larger caches.  The cache must be in passthrough mode
+when invalidate_cblocks is used.
+
+   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.
+   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
 Examples
 ========

 The test suite can be found here:

-https://github.com/jthornber/thinp-test-suite
+https://github.com/jthornber/device-mapper-test-suite

 dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
 	/dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
--- a/doc/kernel/crypt.txt
+++ b/doc/kernel/crypt.txt
@@ -4,12 +4,15 @@ dm-crypt
 Device-Mapper's "crypt" target provides transparent encryption of block devices
 using the kernel crypto API.

+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
 Parameters: <cipher> <key> <iv_offset> <device path> \
 	      <offset> [<#opt_params> <opt_params>]

 <cipher>
    Encryption cipher and an optional IV generation mode.
-    (In format cipher[:keycount]-chainmode-ivopts:ivmode).
+    (In format cipher[:keycount]-chainmode-ivmode[:ivopts]).
    Examples:
       des
       aes-cbc-essiv:sha256
@@ -19,7 +22,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> \

 <key>
    Key used for encryption. It is encoded as a hexadecimal number.
-    You can only use key sizes that are valid for the selected cipher.
+    You can only use key sizes that are valid for the selected cipher
+    in combination with the selected iv mode.
+    Note that for some iv modes the key string can contain additional
+    keys (for example IV seed) so the key contains more parts concatenated
+    into a single string.

 <keycount>
    Multi-key compatibility mode. You can define <keycount> keys and
@@ -44,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
    Otherwise #opt_params is the number of following arguments.

    Example of optional parameters section:
-        1 allow_discards
+        3 allow_discards same_cpu_crypt submit_from_crypt_cpus

 allow_discards
    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
@@ -56,11 +63,24 @@ allow_discards
    used space etc.) if the discarded blocks can be located easily on the
    device later.

+same_cpu_crypt
+    Perform encryption using the same cpu that IO was submitted on.
+    The default is to use an unbound workqueue so that encryption work
+    is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+    Disable offloading writes to a separate thread after encryption.
+    There are some situations where offloading write bios from the
+    encryption threads to a single thread degrades performance
+    significantly.  The default is to offload write bios to the same
+    thread because it benefits CFQ to have writes submitted using the
+    same context.
+
 Example scripts
 ===============
 LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
 encryption with dm-crypt using the 'cryptsetup' utility, see
-http://code.google.com/p/cryptsetup/
+https://gitlab.com/cryptsetup/cryptsetup

 [[
 #!/bin/sh
--- a/doc/kernel/era.txt
+++ b/doc/kernel/era.txt
@@ -0,0 +1,108 @@
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target.  In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'.  Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+ era <metadata dev> <origin dev> <block size>
+
+ metadata dev    : fast device holding the persistent metadata
+ origin dev	 : device holding data blocks that may change
+ block size      : block size of origin data device, granularity that is
+		     tracked by the target
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era.  You shouldn't assume the era has
+incremented.  After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+metadata block size	 : Fixed block size for each metadata block in
+			     sectors
+#used metadata blocks	 : Number of metadata blocks used
+#total metadata blocks	 : Total number of metadata blocks
+current era		 : The current era
+held metadata root	 : The location, in blocks, of the metadata root
+			     that has been 'held' for userspace read
+			     access. '-' indicates there is no held root
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+  associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+  by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era.  It also
+has a spare bitset ready for switching over to a new era.  Other than
+that it uses a few 4k blocks for updating metadata.
+
+   (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed.  As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+    https://github.com/jthornber/thin-provisioning-tools
--- a/doc/kernel/log-writes.txt
+++ b/doc/kernel/log-writes.txt
@@ -0,0 +1,140 @@
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to.  This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log.  The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache.  This means that normal WRITE requests are not actually logged until the
+next REQ_FLUSH request.  This is to make it easier for userspace to replay the
+log in a way that correlates to what is on disk and not what is in cache, to
+make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_FLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
+completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures.  Consider the
+following example (W means write, C means complete):
+
+W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following
+
+W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_DISCARD requests are treated like WRITE requests.  Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request.  Consider the following example:
+
+WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this
+
+DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+   log-writes <dev_path> <log_dev_path>
+
+   dev_path	: Device that all of the IO will go to normally.
+   log_dev_path : Device where the log entries are written to.
+
+ii) Status
+
+    <#logged entries> <highest allocated sector>
+
+    #logged entries	       : Number of logged entries
+    highest allocated sector   : Highest allocated sector
+
+iii) Messages
+
+    mark <description>
+
+	You can use a dmsetup message to set an arbitrary mark in a log.
+	For example say you want to fsck a file system after every
+	write, but first you need to replay up to the mkfs to make sure
+	we're fsck'ing something reasonable, you would do something like
+	this:
+
+	  mkfs.btrfs -f /dev/mapper/log
+	  dmsetup message log 0 mark mkfs
+	  <run test>
+
+	  This would allow you to replay the log up to the mkfs mark and
+	  then replay from that point on doing the fsck check in the
+	  interval that you want.
+
+	Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system.  You would do something like
+this:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<some test that does fsync at the end>
+dmsetup message log 0 mark fsync
+md5sum /mnt/btrfs-test/foo
+umount /mnt/btrfs-test
+
+dmsetup remove log
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+mount /dev/sdb /mnt/btrfs-test
+md5sum /mnt/btrfs-test/foo
+<verify md5sum's are correct>
+
+Another option is to do a complicated file system operation and verify the file
+system is consistent during the entire operation.  You could do this with:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<fsstress to dirty the fs>
+btrfs filesystem balance /mnt/btrfs-test
+umount /mnt/btrfs-test
+dmsetup remove log
+
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+btrfsck /dev/sdb
+replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+	--fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
--- a/doc/kernel/raid.txt
+++ b/doc/kernel/raid.txt
@@ -222,3 +222,5 @@ Version History
 1.4.2   Add RAID10 "far" and "offset" algorithm support.
 1.5.0   Add message interface to allow manipulation of the sync_action.
 	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+1.5.1   Add ability to restore transiently failed devices on resume.
+1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
--- a/doc/kernel/statistics.txt
+++ b/doc/kernel/statistics.txt
@@ -0,0 +1,186 @@
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device.	 If no regions are defined no statistics are
+collected so there isn't any performance impact.  Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as /sys/block/*/stat or /proc/diskstats (see:
+Documentation/iostats.txt).  But two extra counters (12 and 13) are
+provided: total time spent reading and writing in milliseconds.	 All
+these counters may be accessed by sending the @stats_print message to
+the appropriate DM device via dmsetup.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created.	 The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc.  Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space.  At most, 1/4 of the overall system
+memory may be allocated by DM statistics.  The admin can see how much
+memory is used by reading
+/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+    @stats_create <range> <step> [<program_id> [<aux_data>]]
+
+	Create a new region and return the region_id.
+
+	<range>
+	  "-" - whole device
+	  "<start_sector>+<length>" - a range of <length> 512-byte sectors
+				      starting with <start_sector>.
+
+	<step>
+	  "<area_size>" - the range is subdivided into areas each containing
+			  <area_size> sectors.
+	  "/<number_of_areas>" - the range is subdivided into the specified
+				 number of areas.
+
+	<program_id>
+	  An optional parameter.  A name that uniquely identifies
+	  the userspace owner of the range.  This groups ranges together
+	  so that userspace programs can identify the ranges they
+	  created and ignore those created by others.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use it for anything else.
+
+	<aux_data>
+	  An optional parameter.  A word that provides auxiliary data
+	  that is useful to the client program that created the range.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use this value for anything.
+
+    @stats_delete <region_id>
+
+	Delete the region with the specified id.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_clear <region_id>
+
+	Clear all the counters except the in-flight i/o counters.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_list [<program_id>]
+
+	List all regions registered with @stats_create.
+
+	<program_id>
+	  An optional parameter.
+	  If this parameter is specified, only matching regions
+	  are returned.
+	  If it is not specified, all regions are returned.
+
+	Output format:
+	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+
+    @stats_print <region_id> [<starting_line> <number_of_lines>]
+
+	Print counters for each step-sized area of a region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are returned.
+
+	<number_of_lines>
+	  The number of lines to include in the output.
+	  If omitted, all lines are returned.
+
+	Output format for each step-sized area of a region:
+
+	  <start_sector>+<length> counters
+
+	  The first 11 counters have the same meaning as
+	  /sys/block/*/stat or /proc/diskstats.
+
+	  Please refer to Documentation/iostats.txt for details.
+
+	  1. the number of reads completed
+	  2. the number of reads merged
+	  3. the number of sectors read
+	  4. the number of milliseconds spent reading
+	  5. the number of writes completed
+	  6. the number of writes merged
+	  7. the number of sectors written
+	  8. the number of milliseconds spent writing
+	  9. the number of I/Os currently in progress
+	  10. the number of milliseconds spent doing I/Os
+	  11. the weighted number of milliseconds spent doing I/Os
+
+	  Additional counters:
+	  12. the total time spent reading in milliseconds
+	  13. the total time spent writing in milliseconds
+
+    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+
+	Atomically print and then clear all the counters except the
+	in-flight i/o counters.	 Useful when the client consuming the
+	statistics does not want to lose any statistics (those updated
+	between printing and clearing).
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are printed and then cleared.
+
+	<number_of_lines>
+	  The number of lines to process.
+	  If omitted, all lines are printed and then cleared.
+
+    @stats_set_aux <region_id> <aux_data>
+
+	Store auxiliary data aux_data for the specified region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<aux_data>
+	  The string that identifies data which is useful to the client
+	  program that created the range.  The kernel returns this
+	  string back in the output of @stats_list message, but it
+	  doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them:
+
+  dmsetup message vol 0 @stats_create - /100
+
+Set the auxillary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them):
+
+  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics:
+
+  dmsetup message vol 0 @stats_list
+
+Print the statistics:
+
+  dmsetup message vol 0 @stats_print 0
+
+Delete the statistics:
+
+  dmsetup message vol 0 @stats_delete 0
--- a/doc/kernel/switch.txt
+++ b/doc/kernel/switch.txt
@@ -0,0 +1,138 @@
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths.  The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture.  In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters.  When a LUN
+is created it is spread across multiple members.  The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used.  When iSCSI sessions are created, each
+session is connected to an eth port on a single member.  Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required.  This
+forwarding is invisible to the initiator.  The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators.  In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth.  An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets.  However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+    Upper Tier - Determine which array member the I/O should be sent to.
+    Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths.  We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device.  This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O.  By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us).  This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+    <num_paths> <region_size> <num_optional_args> [<optional_args>...]
+    [<dev_path> <offset>]+
+
+<num_paths>
+    The number of paths across which to distribute the I/O.
+
+<region_size>
+    The number of 512-byte sectors in a region. Each region can be redirected
+    to any of the available paths.
+
+<num_optional_args>
+    The number of optional arguments. Currently, no optional arguments
+    are supported and so this must be zero.
+
+<dev_path>
+    The block device that represents a specific path to the device.
+
+<offset>
+    The offset of the start of data on the specific <dev_path> (in units
+    of 512-byte sectors). This number is added to the sector number when
+    forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+    The region number (region size was specified in constructor parameters).
+    If index is omitted, the next region (previous index + 1) is used.
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+    The path number in the range 0 ... (<num_paths> - 1).
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+    slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size:
+    dmsetup create switch --table "0 `blockdev --getsize /dev/vg1/switch0`
+	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1:
+    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command:
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+is equivalent to:
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+
--- a/doc/kernel/thin-provisioning.txt
+++ b/doc/kernel/thin-provisioning.txt
@@ -99,13 +99,14 @@ Using an existing pool device
 		 $data_block_size $low_water_mark"

 $data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.  People
-primarily interested in thin provisioning may want to use a value such
-as 1024 (512KB).  People doing lots of snapshotting may want a smaller value
-such as 128 (64KB).  If you are not zeroing newly-allocated data,
-a larger $data_block_size in the region of 256000 (128MB) is suggested.
-$data_block_size must be the same for the lifetime of the
-metadata device.
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB).  $data_block_size cannot be changed after the
+thin-pool is created.  People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB).  People doing lots of
+snapshotting may want a smaller value such as 128 (64KB).  If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.

 $low_water_mark is expressed in blocks of size $data_block_size.  If
 free space on the data device drops below this level then a dm event
@@ -115,6 +116,35 @@ Resuming a device with a new table itself triggers an event so the
 userspace daemon can use this to detect a situation where a new table
 already exceeds the threshold.

+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache.  If power is lost you may lose some recent
+writes.  The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space).  If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation.  Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed.  It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted.  Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
 Thin provisioning
 -----------------

@@ -234,6 +264,8 @@ i) Constructor
      read_only: Don't allow any changes to be made to the pool
 		 metadata.

+      error_if_no_space: Error IOs, instead of queueing, if no space.
+
    Data block size must be between 64KB (128 sectors) and 1GB
    (2097152 sectors) inclusive.

@@ -255,10 +287,9 @@ ii) Status
 	should register for the event and then check the target's status.

    held metadata root:
-	The location, in sectors, of the metadata root that has been
+	The location, in blocks, of the metadata root that has been
 	'held' for userspace read access.  '-' indicates there is no
-	held root.  This feature is not yet implemented so '-' is
-	always returned.
+	held root.

    discard_passdown|no_discard_passdown
 	Whether or not discards are actually being passed down to the
@@ -275,6 +306,14 @@ ii) Status
 	contain the string 'Fail'.  The userspace recovery tools
 	should then be used.

+    error_if_no_space|queue_if_no_space
+	If the pool runs out of data or metadata space, the pool will
+	either queue or error the IO destined to the data device.  The
+	default is to queue the IO until more space is added or the
+	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
+	module parameter can be used to change this timeout -- it
+	defaults to 60 seconds but may be disabled using a value of 0.
+
 iii) Messages

    create_thin <dev id>
@@ -341,9 +380,6 @@ then you'll have no access to blocks mapped beyond the end.  If you
 load a target that is bigger than before, then extra blocks will be
 provisioned as and when needed.

-If you wish to reduce the size of your thin device and potentially
-regain some space then send the 'trim' message to the pool.
-
 ii) Status

     <nr mapped sectors> <highest mapped sector>
--- a/doc/kernel/verity.txt
+++ b/doc/kernel/verity.txt
@@ -11,6 +11,7 @@ Construction Parameters
    <data_block_size> <hash_block_size>
    <num_data_blocks> <hash_start_block>
    <algorithm> <digest> <salt>
+    [<#opt_params> <opt_params>]

 <version>
    This is the type of the on-disk hash format.
@@ -62,6 +63,22 @@ Construction Parameters
 <salt>
    The hexadecimal encoding of the salt value.

+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        1 ignore_corruption
+
+ignore_corruption
+    Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+    Restart the system when a corrupted block is discovered. This option is
+    not compatible with ignore_corruption and requires user space support to
+    avoid restart loops.
+
 Theory of operation
 ===================

@@ -125,7 +142,7 @@ block boundary) are the hash blocks which are stored a depth at a time

 The full specification of kernel parameters and on-disk metadata format
 is available at the cryptsetup project's wiki page
-  http://code.google.com/p/cryptsetup/wiki/DMVerity
+  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity

 Status
 ======
@@ -142,7 +159,7 @@ Set up a device:

 A command line tool veritysetup is available to compute or verify
 the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository http://code.google.com/p/cryptsetup/
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
 (as a libcryptsetup extension).

 Create hash on the device:
--- a/doc/lvmetad_design.txt
+++ b/doc/lvmetad_design.txt
@@ -137,6 +137,17 @@ hosts. Overall, this is not hard, but the devil is in the details. I would
 possibly disable lvmetad for clustered volume groups in the first phase and
 only proceed when the local mode is robust and well tested.

+With lvmlockd, lvmetad state is kept up to date by flagging either an
+individual VG as "invalid", or the global state as "invalid".  When either
+the VG or the global state are read, this invalid flag is returned along
+with the data.  The client command can check for this invalid state and
+decide to read the information from disk rather than use the stale cached
+data.  After the latest data is read from disk, the command may choose to
+send it to lvmetad to update the cache.  lvmlockd uses version numbers
+embedded in its VG and global locks to detect when cached data becomes
+invalid, and it then tells lvmetad to set the related invalid flag.
+dct, 2015-06-23
+
 Protocol & co.
 --------------

--- a/doc/lvmpolld_overview.txt
+++ b/doc/lvmpolld_overview.txt
@@ -0,0 +1,81 @@
+LVM poll daemon overview
+========================
+
+(last updated: 2015-05-09)
+
+LVM poll daemon (lvmpolld) is the alternative for lvm2 classical polling
+mechanisms. The motivation behind new lvmpolld was to create persistent
+system service that would be more durable and transparent. It's suited
+particularly for any systemd enabled distribution.
+
+Before lvmpolld any background polling process originating in a lvm2 command
+initiated inside cgroup of a systemd service could get killed if the main
+process (service) exited in such cgroup. That could lead to premature termination
+of such lvm2 polling process.
+
+Also without lvmpolld there were no means to detect a particular polling process
+suited for monitoring of specific operation is already in-progress and therefore
+it's not desirable to start next one with exactly same task. lvmpolld is able to
+detect such duplicate requests and not spawn such redundant process.
+
+lvmpolld is primarily targeted for systems with systemd as init process. For systems
+without systemd there's no need to install lvmpolld because there is no issue
+with observation described in second paragraph. You can still benefit from
+avoiding duplicate polling process being spawned, but without systemd lvmpolld
+can't easily be run on-demand (activated by a socket maintained by systemd).
+
+lvmpolld implement shutdown on idle and can shutdown automatically when idle
+for requested time. 60 second is recommended default here. This behaviour can be
+turned off if found useless.
+
+Data structures
+---------------
+
+a) Logical Volume (struct lvmpolld_lv)
+
+Each operation is identified by LV. Internal identifier within lvmpolld
+is full LV uuid (vg_uuid+lv_uuid) prefixed with LVM_SYSTEM_DIR if set by client.
+
+such full identifier may look like:
+
+  "/etc/lvm/lvm.confWFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+or without LVM_SYSTEM_DIR being set explicitly:
+
+  "WFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+
+LV carries various metadata about polling operation. The most significant are:
+
+VG name
+LV name
+polling interval (usually --interval passed to lvm2 command or default from lvm2 
+		  configuration)
+operation type (one of: pvmove, convert, merge, thin_merge)
+LVM_SYSTEM_DIR (if set, this is also passed among environment variables of lvpoll
+		command spawned by lvmpolld)
+
+b) LV stores (struct lvmpolld_store)
+
+lvmpolld uses two stores for Logical volumes (struct lvmpolld_lv). One store for polling
+operations in-progress. These operations are as of now: PV move, mirror up-conversion,
+classical snapshot merge, thin snapshot merge.
+
+The second store is suited only for pvmove --abort operations in-progress. Both
+stores are independent and identical LVs (pvmove /dev/sda3 and pvmove --abort /dev/sda3)
+can be run concurently from lvmpolld point of view (on lvm2 side the consistency is
+guaranteed by lvm2 locking mechanism).
+
+Locking order
+-------------
+
+There are two types of locks in lvmpolld. Each store has own store lock and each LV has
+own lv lock.
+
+Locking order is:
+1) store lock
+2) LV lock
+
+Each LV has to be inside a store. When daemon requires to take both locks it has
+to take a store lock first and LV lock has to be taken afterwards (after the
+appropriate store lock where the LV is being stored :))
--- a/include/.symlinks.in
+++ b/include/.symlinks.in
@@ -1,11 +1,15 @@
@top_srcdir@/daemons/clvmd/clvm.h
@top_srcdir@/daemons/dmeventd/libdevmapper-event.h
@top_srcdir@/daemons/lvmetad/lvmetad-client.h
+@top_srcdir@/daemons/lvmpolld/lvmpolld-protocol.h
+@top_srcdir@/daemons/lvmpolld/polling_ops.h
+@top_srcdir@/daemons/lvmlockd/lvmlockd-client.h
@top_srcdir@/liblvm/lvm2app.h
@top_srcdir@/lib/activate/activate.h
@top_srcdir@/lib/activate/targets.h
@top_srcdir@/lib/cache/lvmcache.h
@top_srcdir@/lib/cache/lvmetad.h
+@top_srcdir@/lib/locking/lvmlockd.h
@top_srcdir@/lib/commands/toolcontext.h
@top_srcdir@/lib/config/config.h
@top_srcdir@/lib/config/config_settings.h
@@ -13,6 +17,7 @@
@top_srcdir@/lib/datastruct/btree.h
@top_srcdir@/lib/datastruct/str_list.h
@top_srcdir@/lib/device/dev-cache.h
+@top_srcdir@/lib/device/dev-ext-udev-constants.h
@top_srcdir@/lib/device/dev-type.h
@top_srcdir@/lib/device/device.h
@top_srcdir@/lib/device/device-types.h
@@ -28,6 +33,8 @@
@top_srcdir@/lib/locking/locking.h
@top_srcdir@/lib/log/log.h
@top_srcdir@/lib/log/lvm-logging.h
+@top_srcdir@/lib/lvmpolld/lvmpolld-client.h
+@top_srcdir@/lib/lvmpolld/polldaemon.h
@top_srcdir@/lib/metadata/lv.h
@top_srcdir@/lib/metadata/lv_alloc.h
@top_srcdir@/lib/metadata/metadata.h
@@ -69,3 +76,4 @@
@top_srcdir@/libdm/misc/kdev_t.h
@top_srcdir@/po/pogen.h
@top_srcdir@/tools/lvm2cmd.h
+@top_srcdir@/tools/tool.h
--- a/include/Makefile.in
+++ b/include/Makefile.in
@@ -20,8 +20,12 @@ include $(top_builddir)/make.tmpl

 all: .symlinks_created

-.symlinks_created: .symlinks 
-	find . -maxdepth 1 -type l -exec $(RM) \{\} \;
+LINKS := $(shell find . -maxdepth 1 -type l)
+
+.symlinks_created: .symlinks
+ifneq (,$(firstword $(LINKS)))
+	$(RM) $(LINKS)
+endif
 	for i in `cat $<`; do $(LN_S) $$i ; done
 	touch $@

@@ -31,5 +35,5 @@ device-mapper: all

 cflow: all

-DISTCLEAN_TARGETS += $(shell find . -maxdepth 1 -type l)
-DISTCLEAN_TARGETS += .include_symlinks .symlinks_created .symlinks
+DISTCLEAN_TARGETS += .symlinks
+CLEAN_TARGETS += $(LINKS) .include_symlinks .symlinks_created
--- a/lib/Makefile.in
+++ b/lib/Makefile.in
@@ -56,6 +56,7 @@ SOURCES =\
 	datastruct/btree.c \
 	datastruct/str_list.c \
 	device/dev-cache.c \
+	device/dev-ext.c \
 	device/dev-io.c \
 	device/dev-md.c \
 	device/dev-swap.c \
@@ -69,9 +70,11 @@ SOURCES =\
 	filters/filter-regex.c \
 	filters/filter-sysfs.c \
 	filters/filter-md.c \
+	filters/filter-fwraid.c \
 	filters/filter-mpath.c \
 	filters/filter-partitioned.c \
 	filters/filter-type.c \
+	filters/filter-usable.c \
 	format_text/archive.c \
 	format_text/archiver.c \
 	format_text/export.c \
@@ -79,7 +82,6 @@ SOURCES =\
 	format_text/format-text.c \
 	format_text/import.c \
 	format_text/import_vsn1.c \
-	format_text/tags.c \
 	format_text/text_label.c \
 	freeseg/freeseg.c \
 	label/label.c \
@@ -120,11 +122,6 @@ SOURCES =\
 	uuid/uuid.c \
 	zero/zero.c

-ifeq ("@HAVE_REALTIME@", "yes")
-  SOURCES +=\
-	misc/timestamp.c
-endif
-
 ifeq ("@LVM1@", "internal")
  SOURCES +=\
 	format1/disk-rep.c \
@@ -193,6 +190,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
 	cache/lvmetad.c
 endif

+ifeq ("@BUILD_LVMPOLLD@", "yes")
+  SOURCES +=\
+	lvmpolld/lvmpolld-client.c
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+  SOURCES +=\
+	locking/lvmlockd.c
+endif
+
 ifeq ("@DMEVENTD@", "yes")
  CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
  LIBS += -ldevmapper-event
@@ -219,7 +226,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow

 include $(top_builddir)/make.tmpl

-CFLAGS += $(BLKID_CFLAGS) $(UDEV_CFLAGS)
+CFLAGS += $(BLKID_CFLAGS) $(UDEV_CFLAGS) $(VALGRIND_CFLAGS)

 $(SUBDIRS): $(LIB_STATIC)

--- a/lib/activate/activate.c
+++ b/lib/activate/activate.c
--- a/lib/activate/activate.h
+++ b/lib/activate/activate.h
@@ -30,6 +30,37 @@ struct lvinfo {
 	uint32_t read_ahead;
 };

+typedef enum {
+	SEG_STATUS_NONE,
+	SEG_STATUS_CACHE,
+	SEG_STATUS_RAID,
+	SEG_STATUS_SNAPSHOT,
+	SEG_STATUS_THIN,
+	SEG_STATUS_THIN_POOL,
+	SEG_STATUS_UNKNOWN
+} lv_seg_status_type_t;
+
+struct lv_seg_status {
+	struct dm_pool *mem;			/* input */
+	const struct lv_segment *seg;		/* input */
+	lv_seg_status_type_t type;		/* output */
+	union {
+		struct dm_status_cache *cache;
+		struct dm_status_raid *raid;
+		struct dm_status_snapshot *snapshot;
+		struct dm_status_thin *thin;
+		struct dm_status_thin_pool *thin_pool;
+	};
+};
+
+struct lv_with_info_and_seg_status {
+	const struct logical_volume *lv;	/* input */
+	int info_ok;
+	struct lvinfo info;			/* output */
+	int seg_part_of_lv;			/* output */
+	struct lv_seg_status seg_status;	/* input/output, see lv_seg_status */
+};
+
 struct lv_activate_opts {
 	int exclusive;
 	int origin_only;
@@ -74,34 +105,54 @@ void activation_release(void);
 void activation_exit(void);

 /* int lv_suspend(struct cmd_context *cmd, const char *lvid_s); */
-int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, unsigned exclusive, struct logical_volume *lv_ondisk, struct logical_volume *lv_incore);
-int lv_resume(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, struct logical_volume *lv);
+int lv_suspend_if_active(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, unsigned exclusive,
+			 const struct logical_volume *lv_ondisk, const struct logical_volume *lv_incore);
+int lv_resume(struct cmd_context *cmd, const char *lvid_s, unsigned origin_only, const struct logical_volume *lv);
 int lv_resume_if_active(struct cmd_context *cmd, const char *lvid_s,
-			unsigned origin_only, unsigned exclusive, unsigned revert, struct logical_volume *lv);
+			unsigned origin_only, unsigned exclusive, unsigned revert, const struct logical_volume *lv);
 int lv_activate(struct cmd_context *cmd, const char *lvid_s, int exclusive,
-		int noscan, int temporary, struct logical_volume *lv);
+		int noscan, int temporary, const struct logical_volume *lv);
 int lv_activate_with_filter(struct cmd_context *cmd, const char *lvid_s, int exclusive,
-			    int noscan, int temporary, struct logical_volume *lv);
-int lv_deactivate(struct cmd_context *cmd, const char *lvid_s, struct logical_volume *lv);
+			    int noscan, int temporary, const struct logical_volume *lv);
+int lv_deactivate(struct cmd_context *cmd, const char *lvid_s, const struct logical_volume *lv);

 int lv_mknodes(struct cmd_context *cmd, const struct logical_volume *lv);

 /*
- * Returns 1 if info structure has been populated, else 0.
+ * Returns 1 if info structure has been populated, else 0 on failure.
+ * When lvinfo* is NULL, it returns 1 if the device is locally active, 0 otherwise.
 */
 int lv_info(struct cmd_context *cmd, const struct logical_volume *lv, int use_layer,
 	    struct lvinfo *info, int with_open_count, int with_read_ahead);
 int lv_info_by_lvid(struct cmd_context *cmd, const char *lvid_s, int use_layer,
 		    struct lvinfo *info, int with_open_count, int with_read_ahead);

-int lv_check_not_in_use(struct cmd_context *cmd, struct logical_volume *lv,
-			struct lvinfo *info);
+/*
+ * Returns 1 if lv_seg_status structure has been populated,
+ * else 0 on failure or if device not active locally.
+ */
+int lv_status(struct cmd_context *cmd, const struct lv_segment *lv_seg,
+	      int use_layer, struct lv_seg_status *lv_seg_status);
+
+/*
+ * Returns 1 if lv_info_and_seg_status structure has been populated,
+ * else 0 on failure or if device not active locally.
+ *
+ * lv_info_with_seg_status is the same as calling lv_info and then lv_status,
+ * but this fn tries to do that with one ioctl if possible.
+ */
+int lv_info_with_seg_status(struct cmd_context *cmd, const struct logical_volume *lv,
+			    const struct lv_segment *lv_seg, int use_layer,
+			    struct lv_with_info_and_seg_status *status,
+			    int with_open_count, int with_read_ahead);
+
+int lv_check_not_in_use(const struct logical_volume *lv);

 /*
 * Returns 1 if activate_lv has been set: 1 = activate; 0 = don't.
 */
 int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s,
-			 int *activate_lv, struct logical_volume *lv);
+			 int *activate_lv, const struct logical_volume *lv);
 /*
 * Checks against the auto_activation_volume_list and
 * returns 1 if the LV should be activated, 0 otherwise.
@@ -120,12 +171,8 @@ int lv_raid_dev_health(const struct logical_volume *lv, char **dev_health);
 int lv_raid_mismatch_count(const struct logical_volume *lv, uint64_t *cnt);
 int lv_raid_sync_action(const struct logical_volume *lv, char **sync_action);
 int lv_raid_message(const struct logical_volume *lv, const char *msg);
-int lv_cache_block_info(struct logical_volume *lv,
-			uint32_t *chunk_size, uint64_t *dirty_count,
-			uint64_t *used_count, uint64_t *total_count);
-int lv_cache_policy_info(struct logical_volume *lv,
-			 const char **policy_name, int *policy_argc,
-			 const char ***policy_argv);
+int lv_cache_status(const struct logical_volume *lv,
+		    struct lv_status_cache **status);
 int lv_thin_pool_percent(const struct logical_volume *lv, int metadata,
 			 dm_percent_t *percent);
 int lv_thin_percent(const struct logical_volume *lv, int mapped,
@@ -147,18 +194,18 @@ int lv_is_active_exclusive(const struct logical_volume *lv);
 int lv_is_active_exclusive_locally(const struct logical_volume *lv);
 int lv_is_active_exclusive_remotely(const struct logical_volume *lv);

-int lv_has_target_type(struct dm_pool *mem, struct logical_volume *lv,
+int lv_has_target_type(struct dm_pool *mem, const struct logical_volume *lv,
 		       const char *layer, const char *target_type);

-int monitor_dev_for_events(struct cmd_context *cmd, struct logical_volume *lv,
+int monitor_dev_for_events(struct cmd_context *cmd, const struct logical_volume *lv,
 			   const struct lv_activate_opts *laopts, int do_reg);

 #ifdef DMEVENTD
 #  include "libdevmapper-event.h"
 char *get_monitor_dso_path(struct cmd_context *cmd, const char *libpath);
 int target_registered_with_dmeventd(struct cmd_context *cmd, const char *libpath,
-				    struct logical_volume *lv, int *pending);
-int target_register_events(struct cmd_context *cmd, const char *dso, struct logical_volume *lv,
+				    const struct logical_volume *lv, int *pending);
+int target_register_events(struct cmd_context *cmd, const char *dso, const struct logical_volume *lv,
 			    int evmask __attribute__((unused)), int set, int timeout);
 #endif

@@ -172,18 +219,19 @@ int add_linear_area_to_dtree(struct dm_tree_node *node, uint64_t size,
 int pv_uses_vg(struct physical_volume *pv,
 	       struct volume_group *vg);

+struct dev_usable_check_params {
+	unsigned int check_empty:1;
+	unsigned int check_blocked:1;
+	unsigned int check_suspended:1;
+	unsigned int check_error_target:1;
+	unsigned int check_reserved:1;
+};
+
 /*
 * Returns 1 if mapped device is not suspended, blocked or
 * is using a reserved name.
 */
-int device_is_usable(struct device *dev);
-
-/*
- * Returns 1 if the device is suspended or blocking.
- * (Does not perform check on the LV name of the device.)
- * N.B.  This is !device_is_usable() without the name check.
- */
-int device_is_suspended_or_blocking(struct device *dev);
+int device_is_usable(struct device *dev, struct dev_usable_check_params check);

 /*
 * Declaration moved here from fs.h to keep header fs.h hidden
--- a/lib/activate/dev_manager.c
+++ b/lib/activate/dev_manager.c
--- a/lib/activate/dev_manager.h
+++ b/lib/activate/dev_manager.h
@@ -25,8 +25,9 @@ struct cmd_context;
 struct dev_manager;
 struct dm_info;
 struct device;
+struct lv_seg_status;

-int read_only_lv(struct logical_volume *lv, struct lv_activate_opts *laopts);
+int read_only_lv(const struct logical_volume *lv, const struct lv_activate_opts *laopts);

 /*
 * Constructor and destructor.
@@ -47,7 +48,8 @@ void dev_manager_exit(void);
 int dev_manager_info(struct dm_pool *mem, const struct logical_volume *lv,
 		     const char *layer,
 		     int with_open_count, int with_read_ahead,
-		     struct dm_info *info, uint32_t *read_ahead);
+		     struct dm_info *dminfo, uint32_t *read_ahead,
+		     struct lv_seg_status *seg_status);
 int dev_manager_snapshot_percent(struct dev_manager *dm,
 				 const struct logical_volume *lv,
 				 dm_percent_t *percent);
@@ -62,7 +64,7 @@ int dev_manager_raid_message(struct dev_manager *dm,
 			     const char *msg);
 int dev_manager_cache_status(struct dev_manager *dm,
 			     const struct logical_volume *lv,
-			     struct dm_status_cache **status);
+			     struct lv_status_cache **status);
 int dev_manager_thin_pool_status(struct dev_manager *dm,
 				 const struct logical_volume *lv,
 				 struct dm_status_thin_pool **status,
@@ -76,14 +78,14 @@ int dev_manager_thin_percent(struct dev_manager *dm,
 int dev_manager_thin_device_id(struct dev_manager *dm,
 			       const struct logical_volume *lv,
 			       uint32_t *device_id);
-int dev_manager_suspend(struct dev_manager *dm, struct logical_volume *lv,
+int dev_manager_suspend(struct dev_manager *dm, const struct logical_volume *lv,
 			struct lv_activate_opts *laopts, int lockfs, int flush_required);
-int dev_manager_activate(struct dev_manager *dm, struct logical_volume *lv,
+int dev_manager_activate(struct dev_manager *dm, const struct logical_volume *lv,
 			 struct lv_activate_opts *laopts);
-int dev_manager_preload(struct dev_manager *dm, struct logical_volume *lv,
+int dev_manager_preload(struct dev_manager *dm, const struct logical_volume *lv,
 			struct lv_activate_opts *laopts, int *flush_required);
-int dev_manager_deactivate(struct dev_manager *dm, struct logical_volume *lv);
-int dev_manager_transient(struct dev_manager *dm, struct logical_volume *lv) __attribute__((nonnull(1, 2)));
+int dev_manager_deactivate(struct dev_manager *dm, const struct logical_volume *lv);
+int dev_manager_transient(struct dev_manager *dm, const struct logical_volume *lv) __attribute__((nonnull(1, 2)));

 int dev_manager_mknodes(const struct logical_volume *lv);

--- a/lib/activate/fs.c
+++ b/lib/activate/fs.c
@@ -468,8 +468,8 @@ int fs_del_lv_byname(const char *dev_dir, const char *vg_name,
 	return _fs_op(FS_DEL, dev_dir, vg_name, lv_name, "", "", check_udev);
 }

-int fs_rename_lv(struct logical_volume *lv, const char *dev, 
-		const char *old_vgname, const char *old_lvname)
+int fs_rename_lv(const struct logical_volume *lv, const char *dev,
+		 const char *old_vgname, const char *old_lvname)
 {
 	if (strcmp(old_vgname, lv->vg->name)) {
 		return
--- a/lib/activate/fs.h
+++ b/lib/activate/fs.h
@@ -27,7 +27,7 @@ int fs_add_lv(const struct logical_volume *lv, const char *dev);
 int fs_del_lv(const struct logical_volume *lv);
 int fs_del_lv_byname(const char *dev_dir, const char *vg_name,
 		     const char *lv_name, int check_udev);
-int fs_rename_lv(struct logical_volume *lv, const char *dev, 
+int fs_rename_lv(const struct logical_volume *lv, const char *dev,
 		 const char *old_vgname, const char *old_lvname);
 /* void fs_unlock(void);  moved to activate.h */
 uint32_t fs_get_cookie(void);
--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -56,6 +56,9 @@ struct lvmcache_vginfo {
 	char _padding[7];
 	struct lvmcache_vginfo *next; /* Another VG with same name? */
 	char *creation_host;
+	char *lock_type;
+	uint32_t mda_checksum;
+	size_t mda_size;
 	size_t vgmetadata_size;
 	char *vgmetadata;	/* Copy of VG metadata as format_text string */
 	struct dm_config_tree *cft; /* Config tree created from vgmetadata */
@@ -65,6 +68,7 @@ struct lvmcache_vginfo {
 	unsigned vg_use_count;	/* Counter of vg reusage */
 	unsigned precommitted;	/* Is vgmetadata live or precommitted? */
 	unsigned cached_vg_invalidated;	/* Signal to regenerate cached_vg */
+	unsigned preferred_duplicates; /* preferred duplicate pvs have been set */
 };

 static struct dm_hash_table *_pvid_hash = NULL;
@@ -76,6 +80,7 @@ static int _scanning_in_progress = 0;
 static int _has_scanned = 0;
 static int _vgs_locked = 0;
 static int _vg_global_lock_held = 0;	/* Global lock held when cache wiped? */
+static int _found_duplicate_pvs = 0;	/* If we never see a duplicate PV we can skip checking for them later. */

 int lvmcache_init(void)
 {
@@ -112,6 +117,47 @@ int lvmcache_init(void)
 	return 1;
 }

+/*
+ * Once PV info has been populated in lvmcache and
+ * lvmcache has chosen preferred duplicate devices,
+ * set this flag so that lvmcache will not try to
+ * compare and choose preferred duplicate devices
+ * again (which may result in different preferred
+ * devices.)  PV info can be populated in lvmcache
+ * multiple times, each time causing lvmcache to
+ * compare the duplicate devices, so we need to
+ * record that the comparison/preferences have
+ * already been done, so the preferrences from the
+ * first time through are not changed.
+ *
+ * This is something of a hack to work around the
+ * fact that the code isn't really designed to
+ * handle duplicate PVs, and the fact that lvmetad
+ * has its own way of picking a preferred duplicate
+ * and lvmcache has another way based on having
+ * more information than lvmetad does.
+ *
+ * If we come up with a better overall method to
+ * handle duplicate PVs, then this can probably be
+ * removed.
+ *
+ * FIXME: if we want to make lvmetad work with clvmd,
+ * then this may need to be changed to set
+ * preferred_duplicates back to 0.
+ */
+
+void lvmcache_set_preferred_duplicates(const char *vgid)
+{
+	struct lvmcache_vginfo *vginfo;
+
+	if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) {
+		stack;
+		return;
+	}
+
+	vginfo->preferred_duplicates = 1;
+}
+
 void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd)
 {
 	if (!lvmetad_active() || _has_scanned)
@@ -284,6 +330,9 @@ void lvmcache_commit_metadata(const char *vgname)

 void lvmcache_drop_metadata(const char *vgname, int drop_precommitted)
 {
+	if (lvmcache_vgname_is_locked(VG_GLOBAL) && !vg_write_lock_held())
+		return;
+
 	/* For VG_ORPHANS, we need to invalidate all labels on orphan PVs. */
 	if (!strcmp(vgname, VG_ORPHANS)) {
 		_drop_metadata(FMT_TEXT_ORPHAN_VG_NAME, 0);
@@ -292,7 +341,7 @@ void lvmcache_drop_metadata(const char *vgname, int drop_precommitted)

 		/* Indicate that PVs could now be missing from the cache */
 		init_full_scan_done(0);
-	} else if (!lvmcache_vgname_is_locked(VG_GLOBAL))
+	} else
 		_drop_metadata(vgname, drop_precommitted);
 }

@@ -367,10 +416,10 @@ void lvmcache_lock_vgname(const char *vgname, int read_only __attribute__((unuse
 	if (!dm_hash_insert(_lock_hash, vgname, (void *) 1))
 		log_error("Cache locking failure for %s", vgname);

-	_update_cache_lock_state(vgname, 1);
-
-	if (strcmp(vgname, VG_GLOBAL))
+	if (strcmp(vgname, VG_GLOBAL)) {
+		_update_cache_lock_state(vgname, 1);
 		_vgs_locked++;
+	}
 }

 int lvmcache_vgname_is_locked(const char *vgname)
@@ -387,7 +436,8 @@ void lvmcache_unlock_vgname(const char *vgname)
 		log_error(INTERNAL_ERROR "Attempt to unlock unlocked VG %s.",
 			  vgname);

-	_update_cache_lock_state(vgname, 0);
+	if (strcmp(vgname, VG_GLOBAL))
+		_update_cache_lock_state(vgname, 0);

 	dm_hash_remove(_lock_hash, vgname);

@@ -401,6 +451,16 @@ int lvmcache_vgs_locked(void)
 	return _vgs_locked;
 }

+/*
+ * When lvmcache sees a duplicate PV, this is set.
+ * process_each_pv() can avoid searching for duplicates
+ * by checking this and seeing that no duplicate PVs exist.
+ */
+int lvmcache_found_duplicate_pvs(void)
+{
+	return _found_duplicate_pvs;
+}
+
 static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo,
 				struct lvmcache_info *info)
 {
@@ -693,10 +753,10 @@ int lvmcache_label_scan(struct cmd_context *cmd, int full_scan)
 		goto out;
 	}

-	if (full_scan == 2 && (cmd->filter && !cmd->filter->use_count) && !refresh_filters(cmd))
+	if (full_scan == 2 && (cmd->full_filter && !cmd->full_filter->use_count) && !refresh_filters(cmd))
 		goto_out;

-	if (!cmd->filter || !(iter = dev_iter_create(cmd->filter, (full_scan == 2) ? 1 : 0))) {
+	if (!cmd->full_filter || !(iter = dev_iter_create(cmd->full_filter, (full_scan == 2) ? 1 : 0))) {
 		log_error("dev_iter creation failed");
 		goto out;
 	}
@@ -719,8 +779,8 @@ int lvmcache_label_scan(struct cmd_context *cmd, int full_scan)
 	 * device cache for the benefit of short-lived processes.
 	 */
 	if (full_scan == 2 && cmd->is_long_lived &&
-	    cmd->dump_filter && cmd->filter && cmd->filter->dump &&
-	    !cmd->filter->dump(cmd->filter, 0))
+	    cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump &&
+	    !cmd->full_filter->dump(cmd->full_filter, 0))
 		stack;

 	r = 1;
@@ -846,6 +906,37 @@ int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo
 }
 // #endif

+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+			   struct dm_list *vgnameids)
+{
+	struct vgnameid_list *vgnl;
+	struct lvmcache_vginfo *vginfo;
+
+	lvmcache_label_scan(cmd, 0);
+
+	dm_list_iterate_items(vginfo, &_vginfos) {
+		if (!include_internal && is_orphan_vg(vginfo->vgname))
+			continue;
+
+		if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+			log_error("vgnameid_list allocation failed.");
+			return 0;
+		}
+
+		vgnl->vgid = dm_pool_strdup(cmd->mem, vginfo->vgid);
+		vgnl->vg_name = dm_pool_strdup(cmd->mem, vginfo->vgname);
+
+		if (!vgnl->vgid || !vgnl->vg_name) {
+			log_error("vgnameid_list member allocation failed.");
+			return 0;
+		}
+
+		dm_list_add(vgnameids, &vgnl->list);
+	}
+
+	return 1;
+}
+
 struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
 				   int include_internal)
 {
@@ -1357,7 +1448,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info,
 }

 static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstatus,
-				     const char *creation_host)
+				     const char *creation_host, const char *lock_type)
 {
 	if (!info || !info->vginfo)
 		return 1;
@@ -1370,11 +1461,11 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
 	info->vginfo->status = vgstatus;

 	if (!creation_host)
-		return 1;
+		goto set_lock_type;

 	if (info->vginfo->creation_host && !strcmp(creation_host,
 						   info->vginfo->creation_host))
-		return 1;
+		goto set_lock_type;

 	if (info->vginfo->creation_host)
 		dm_free(info->vginfo->creation_host);
@@ -1388,6 +1479,44 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
 	log_debug_cache("lvmcache: %s: VG %s: Set creation host to %s.",
 			dev_name(info->dev), info->vginfo->vgname, creation_host);

+set_lock_type:
+
+	if (!lock_type)
+		goto out;
+
+	if (info->vginfo->lock_type && !strcmp(lock_type, info->vginfo->lock_type))
+		goto out;
+
+	if (info->vginfo->lock_type)
+		dm_free(info->vginfo->lock_type);
+
+	if (!(info->vginfo->lock_type = dm_strdup(lock_type))) {
+		log_error("cache creation host alloc failed for %s",
+			  lock_type);
+		return 0;
+	}
+
+out:
+	return 1;
+}
+
+static int _lvmcache_update_vg_mda_info(struct lvmcache_info *info, uint32_t mda_checksum,
+					size_t mda_size)
+{
+	if (!info || !info->vginfo || !mda_size)
+		return 1;
+
+	if (info->vginfo->mda_checksum == mda_checksum || info->vginfo->mda_size == mda_size) 
+		return 1;
+
+	info->vginfo->mda_checksum = mda_checksum;
+	info->vginfo->mda_size = mda_size;
+
+	/* FIXME Add checksum index */
+
+	log_debug_cache("lvmcache: %s: VG %s: Stored metadata checksum %" PRIu32 " with size %" PRIsize_t ".",
+			dev_name(info->dev), info->vginfo->vgname, mda_checksum, mda_size);
+
 	return 1;
 }

@@ -1401,10 +1530,11 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt)
 	return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt);
 }

-int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
-				  const char *vgname, const char *vgid,
-				  uint32_t vgstatus, const char *creation_host)
+int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary)
 {
+	const char *vgname = vgsummary->vgname;
+	const char *vgid = (char *)&vgsummary->vgid;
+
 	if (!vgname && !info->vginfo) {
 		log_error(INTERNAL_ERROR "NULL vgname handed to cache");
 		/* FIXME Remove this */
@@ -1432,10 +1562,11 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
 	if (!is_orphan_vg(vgname))
 		info->status &= ~CACHE_INVALID;

-	if (!_lvmcache_update_vgname(info, vgname, vgid, vgstatus,
-				     creation_host, info->fmt) ||
+	if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus,
+				     vgsummary->creation_host, info->fmt) ||
 	    !_lvmcache_update_vgid(info, info->vginfo, vgid) ||
-	    !_lvmcache_update_vgstatus(info, vgstatus, creation_host))
+	    !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type) ||
+	    !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size))
 		return_0;

 	return 1;
@@ -1446,6 +1577,12 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
 	struct pv_list *pvl;
 	struct lvmcache_info *info;
 	char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+	struct lvmcache_vgsummary vgsummary = {
+		.vgname = vg->name,
+		.vgstatus = vg->status,
+		.vgid = vg->id,
+		.lock_type = vg->lock_type
+	};

 	pvid_s[sizeof(pvid_s) - 1] = '\0';

@@ -1453,9 +1590,7 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
 		strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s) - 1);
 		/* FIXME Could pvl->pv->dev->pvid ever be different? */
 		if ((info = lvmcache_info_from_pvid(pvid_s, 0)) &&
-		    !lvmcache_update_vgname_and_id(info, vg->name,
-						   (char *) &vg->id,
-						   vg->status, NULL))
+		    !lvmcache_update_vgname_and_id(info, &vgsummary))
 			return_0;
 	}

@@ -1466,6 +1601,85 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
 	return 1;
 }

+/*
+ * Replace pv->dev with dev so that dev will appear for reporting.
+ */
+
+void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,
+			  struct device *dev)
+{
+	struct lvmcache_info *info;
+	char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+
+	strncpy(pvid_s, (char *) &pv->id, sizeof(pvid_s) - 1);
+	pvid_s[sizeof(pvid_s) - 1] = '\0';
+
+	if (!(info = lvmcache_info_from_pvid(pvid_s, 0)))
+		return;
+
+	info->dev = dev;
+	info->label->dev = dev;
+	pv->dev = dev;
+}
+
+/*
+ * We can see multiple different devices with the
+ * same pvid, i.e. duplicates.
+ *
+ * There may be different reasons for seeing two
+ * devices with the same pvid:
+ * - multipath showing two paths to the same thing
+ * - one device copied to another, e.g. with dd,
+ *   also referred to as cloned devices.
+ * - a "subsystem" taking a device and creating
+ *   another device of its own that represents the
+ *   underlying device it is using, e.g. using dm
+ *   to create an identity mapping of a PV.
+ *
+ * Given duplicate devices, we have to choose one
+ * of them to be the "preferred" dev, i.e. the one
+ * that will be referenced in lvmcache, by pv->dev.
+ * We can keep the existing dev, that's currently
+ * used in lvmcache, or we can replace the existing
+ * dev with the new duplicate.
+ *
+ * Regardless of which device is preferred, we need
+ * to print messages explaining which devices were
+ * found so that a user can sort out for themselves
+ * what has happened if the preferred device is not
+ * the one they are interested in.
+ *
+ * If a user wants to use the non-preferred device,
+ * they will need to filter out the device that
+ * lvm is preferring.
+ *
+ * The dev_subsystem calls check if the major number
+ * of the dev is part of a subsystem like DM/MD/DRBD.
+ * A dev that's part of a subsystem is preferred over a
+ * duplicate of that dev that is not part of a
+ * subsystem.
+ *
+ * The has_holders calls check if the device is being
+ * used by another, and prefers one that's being used.
+ *
+ * FIXME: why do we prefer a device without holders
+ * over a device with holders?  We should understand
+ * the reason for that choice.
+ *
+ * FIXME: there may be other reasons to prefer one
+ * device over another:
+ *
+ * . are there other use/open counts we could check
+ *   beyond the holders?
+ *
+ * . check if either is bad/usable and prefer
+ *   the good one?
+ *
+ * . prefer the one with smaller minor number?
+ *   Might avoid disturbing things due to a new
+ *   transient duplicate?
+ */
+
 struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 				   struct device *dev,
 				   const char *vgname, const char *vgid,
@@ -1476,6 +1690,14 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 	struct label *label;
 	struct lvmcache_info *existing, *info;
 	char pvid_s[ID_LEN + 1] __attribute__((aligned(8)));
+	struct lvmcache_vgsummary vgsummary = {
+		.vgname = vgname,
+		.vgstatus = vgstatus,
+	};
+
+	/* N.B. vgid is not NUL-terminated when called from _text_pv_write */
+	if (vgid)
+		strncpy((char *)&vgsummary.vgid, vgid, sizeof(vgsummary.vgid));

 	if (!_vgname_hash && !lvmcache_init()) {
 		log_error("Internal cache initialisation failed");
@@ -1505,49 +1727,166 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 		lvmcache_del_bas(info);
 	} else {
 		if (existing->dev != dev) {
-			/* Is the existing entry a duplicate pvid e.g. md ? */
-			if (dev_subsystem_part_major(dt, existing->dev) &&
-			    !dev_subsystem_part_major(dt, dev)) {
-				log_very_verbose("Ignoring duplicate PV %s on "
-						 "%s - using %s %s",
-						 pvid, dev_name(dev),
-						 dev_subsystem_name(dt, existing->dev),
-						 dev_name(existing->dev));
+			int old_in_subsystem = 0;
+			int new_in_subsystem = 0;
+			int old_is_dm = 0;
+			int new_is_dm = 0;
+			int old_has_holders = 0;
+			int new_has_holders = 0;
+
+			/*
+			 * Here are different devices with the same pvid:
+			 * duplicates.  See comment above.
+			 */
+
+			/*
+			 * This flag tells the process_each_pv code to search
+			 * the devices list for duplicates, so that devices
+			 * can be processed together with their duplicates
+			 * (while processing the VG, rather than reporting
+			 * pv->dev under the VG, and its duplicate outside
+			 * the VG context.)
+			 */
+			_found_duplicate_pvs = 1;
+
+			/*
+			 * The new dev may not have pvid set.
+			 * The process_each_pv code needs to have the pvid
+			 * set in each device to detect that the devices
+			 * are duplicates.
+			 */
+			strncpy(dev->pvid, pvid_s, sizeof(dev->pvid));
+
+			/*
+			 * Now decide if we are going to ignore the new
+			 * device, or replace the existing/old device in
+			 * lvmcache with the new one.
+			 */
+			old_in_subsystem = dev_subsystem_part_major(dt, existing->dev);
+			new_in_subsystem = dev_subsystem_part_major(dt, dev);
+
+			old_is_dm = dm_is_dm_major(MAJOR(existing->dev->dev));
+			new_is_dm = dm_is_dm_major(MAJOR(dev->dev));
+
+			old_has_holders = dm_device_has_holders(MAJOR(existing->dev->dev), MINOR(existing->dev->dev));
+			new_has_holders = dm_device_has_holders(MAJOR(dev->dev), MINOR(dev->dev));
+
+			if (old_has_holders && new_has_holders) {
+				/*
+				 * This is not a selection of old or new, but
+				 * just a warning to be aware of.
+				 */
+				log_warn("WARNING: duplicate PV %s is being used from both devices %s and %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
+			}
+
+			if (existing->vginfo->preferred_duplicates) {
+				/*
+				 * The preferred duplicate devs have already
+				 * been chosen during a previous populating of
+				 * lvmcache, so just use the existing preferences.
+				 */
+				log_verbose("Found duplicate PV %s: using existing dev %s",
+					    pvid_s,
+					    dev_name(existing->dev));
 				return NULL;
-			} else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				   !dm_is_dm_major(MAJOR(dev->dev))) {
-				log_very_verbose("Ignoring duplicate PV %s on "
-						 "%s - using dm %s",
-						 pvid, dev_name(dev),
-						 dev_name(existing->dev));
+			}
+
+			if (old_in_subsystem && !new_in_subsystem) {
+				/* Use old, ignore new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				log_warn("Using duplicate PV %s from subsystem %s, ignoring %s",
+					 dev_name(existing->dev),
+					 dev_subsystem_name(dt, existing->dev),
+					 dev_name(dev));
 				return NULL;
-			} else if (!dev_subsystem_part_major(dt, existing->dev) &&
-				   dev_subsystem_part_major(dt, dev))
-				log_very_verbose("Duplicate PV %s on %s - "
-						 "using %s %s", pvid,
-						 dev_name(existing->dev),
-						 dev_subsystem_name(dt, existing->dev),
-						 dev_name(dev));
-			else if (!dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				 dm_is_dm_major(MAJOR(dev->dev)))
-				log_very_verbose("Duplicate PV %s on %s - "
-						 "using dm %s", pvid,
-						 dev_name(existing->dev),
-						 dev_name(dev));
-			/* FIXME If both dm, check dependencies */
-			//else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				 //dm_is_dm_major(MAJOR(dev->dev)))
-				 //
-			else if (!strcmp(pvid_s, existing->dev->pvid)) 
-				log_error("Found duplicate PV %s: using %s not "
-					  "%s", pvid, dev_name(dev),
-					  dev_name(existing->dev));
+
+			} else if (!old_in_subsystem && new_in_subsystem) {
+				/* Use new, replace old. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s from subsystem %s, replacing %s",
+					 dev_name(dev),
+					 dev_subsystem_name(dt, dev),
+					 dev_name(existing->dev));
+
+			} else if (old_has_holders && !new_has_holders) {
+				/* Use new, replace old. */
+				/* FIXME: why choose the one without olders? */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s without holders, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+
+			} else if (!old_has_holders && new_has_holders) {
+				/* Use old, ignore new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				log_warn("Using duplicate PV %s without holders, ignoring %s",
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				return NULL;
+
+			} else if (old_is_dm && new_is_dm) {
+				/* Use new, replace old. */
+				/* FIXME: why choose the new instead of the old? */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s which is last seen, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+
+			} else if (!strcmp(pvid_s, existing->dev->pvid)) {
+				/* No criteria to use for preferring old or new. */
+				/* FIXME: why choose the new instead of the old? */
+				/* FIXME: a transient duplicate would be a reason
+				 * to select the old instead of the new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s which is last seen, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+			}
+		} else {
+			/*
+			 * The new dev is the same as the existing dev.
+			 *
+			 * FIXME: Why can't we just return NULL here if the
+			 * device already exists?  Things don't seem to work
+			 * if we do that for some reason.
+			 */
+			log_verbose("Found same device %s with same pvid %s",
+				    dev_name(existing->dev), pvid_s);
 		}
-		if (strcmp(pvid_s, existing->dev->pvid)) 
-			log_debug_cache("Updating pvid cache to %s (%s) from %s (%s)",
-					pvid_s, dev_name(dev),
-					existing->dev->pvid, dev_name(existing->dev));
-		/* Switch over to new preferred device */
+
+		/*
+		 * This happens when running pvcreate on an existing PV.
+		 */
+		if (strcmp(pvid_s, existing->dev->pvid))  {
+			log_verbose("Replacing dev %s pvid %s with dev %s pvid %s",
+				    dev_name(existing->dev), existing->dev->pvid,
+				    dev_name(dev), pvid_s);
+		}
+
+		/*
+		 * Switch over to new preferred device.
+		 */
 		existing->dev = dev;
 		info = existing;
 		/* Has labeller changed? */
@@ -1572,7 +1911,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 		return NULL;
 	}

-	if (!lvmcache_update_vgname_and_id(info, vgname, vgid, vgstatus, NULL)) {
+	if (!lvmcache_update_vgname_and_id(info, &vgsummary)) {
 		if (!existing) {
 			dm_hash_remove(_pvid_hash, pvid_s);
 			strcpy(info->dev->pvid, "");
@@ -1981,3 +2320,41 @@ uint64_t lvmcache_smallest_mda_size(struct lvmcache_info *info)
 const struct format_type *lvmcache_fmt(struct lvmcache_info *info) {
 	return info->fmt;
 }
+
+int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary)
+{
+	struct lvmcache_vginfo *vginfo;
+
+	if (!vgsummary->mda_size)
+		return 0;
+
+	/* FIXME Index the checksums */
+	dm_list_iterate_items(vginfo, &_vginfos) {
+		if (vgsummary->mda_checksum == vginfo->mda_checksum &&
+		    vgsummary->mda_size == vginfo->mda_size &&
+		    !is_orphan_vg(vginfo->vgname)) {
+			vgsummary->vgname = vginfo->vgname;
+			vgsummary->creation_host = vginfo->creation_host;
+			vgsummary->vgstatus = vginfo->status;
+			/* vginfo->vgid has 1 extra byte then vgsummary->vgid */
+			memcpy(&vgsummary->vgid, vginfo->vgid, sizeof(vgsummary->vgid));
+
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd)
+{
+	struct lvmcache_vginfo *vginfo;
+
+	dm_list_iterate_items(vginfo, &_vginfos) {
+		if (vginfo->lock_type && !strcmp(vginfo->lock_type, "sanlock"))
+			return 1;
+	}
+
+	return 0;
+}
+
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@@ -39,6 +39,27 @@ struct disk_locn;

 struct lvmcache_vginfo;

+/*
+ * vgsummary represents a summary of the VG that is read
+ * without a lock.  The info does not come through vg_read(),
+ * but through reading mdas.  It provides information about
+ * the VG that is needed to lock the VG and then read it fully
+ * with vg_read(), after which the VG summary should be checked
+ * against the full VG metadata to verify it was correct (since
+ * it was read without a lock.)
+ *
+ * Once read, vgsummary information is saved in lvmcache_vginfo.
+ */
+struct lvmcache_vgsummary {
+	const char *vgname;
+	struct id vgid;
+	uint64_t vgstatus;
+	char *creation_host;
+	const char *lock_type;
+	uint32_t mda_checksum;
+	size_t mda_size;
+};
+
 int lvmcache_init(void);
 void lvmcache_allow_reads_with_lvmetad(void);

@@ -58,8 +79,7 @@ void lvmcache_del(struct lvmcache_info *info);

 /* Update things */
 int lvmcache_update_vgname_and_id(struct lvmcache_info *info,
-				  const char *vgname, const char *vgid,
-				  uint32_t vgstatus, const char *hostname);
+				  struct lvmcache_vgsummary *vgsummary);
 int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted);

 void lvmcache_lock_vgname(const char *vgname, int read_only);
@@ -68,6 +88,7 @@ int lvmcache_verify_lock_order(const char *vgname);

 /* Queries */
 const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, const char *vgname, const char *vgid, unsigned revalidate_labels);
+int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary);

 /* Decrement and test if there are still vg holders in vginfo. */
 int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo);
@@ -98,6 +119,9 @@ struct dm_list *lvmcache_get_vgnames(struct cmd_context *cmd,
 struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
 				   int include_internal);

+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+                          struct dm_list *vgnameids);
+
 /* Returns list of struct dm_str_list containing pool-allocated copy of pvids */
 struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname,
 				const char *vgid);
@@ -157,4 +181,13 @@ unsigned lvmcache_mda_count(struct lvmcache_info *info);
 int lvmcache_vgid_is_cached(const char *vgid);
 uint64_t lvmcache_smallest_mda_size(struct lvmcache_info *info);

+void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,
+			struct device *dev);
+
+int lvmcache_found_duplicate_pvs(void);
+
+void lvmcache_set_preferred_duplicates(const char *vgid);
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd);
+
 #endif
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -22,6 +22,7 @@
 #include "format-text.h" // TODO for disk_locn, used as a DA representation
 #include "crc.h"
 #include "lvm-signal.h"
+#include "lvmlockd.h"

 #define SCAN_TIMEOUT_SECONDS	80
 #define MAX_RESCANS		10	/* Maximum number of times to scan all PVs and retry if the daemon returns a token mismatch error */
@@ -34,12 +35,13 @@ static char *_lvmetad_token = NULL;
 static const char *_lvmetad_socket = NULL;
 static struct cmd_context *_lvmetad_cmd = NULL;

+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
+
 void lvmetad_disconnect(void)
 {
 	if (_lvmetad_connected)
 		daemon_close(_lvmetad);
 	_lvmetad_connected = 0;
-	_lvmetad_cmd = NULL;
 }

 void lvmetad_init(struct cmd_context *cmd)
@@ -47,6 +49,10 @@ void lvmetad_init(struct cmd_context *cmd)
 	if (!_lvmetad_use && !access(getenv("LVM_LVMETAD_PIDFILE") ? : LVMETAD_PIDFILE, F_OK))
 		log_warn("WARNING: lvmetad is running but disabled."
 			 " Restart lvmetad before enabling it!");
+
+	if (_lvmetad_connected)
+		log_debug(INTERNAL_ERROR "Refreshing lvmetad global handle while connection with the daemon is active");
+
 	_lvmetad_cmd = cmd;
 }

@@ -98,11 +104,13 @@ int lvmetad_active(void)
 	return _lvmetad_connected;
 }

-void lvmetad_set_active(int active)
+void lvmetad_set_active(struct cmd_context *cmd, int active)
 {
 	_lvmetad_use = active;
 	if (!active && lvmetad_active())
 		lvmetad_disconnect();
+	if (cmd && !refresh_filters(cmd))
+		stack;
 }

 /*
@@ -134,10 +142,13 @@ void lvmetad_set_socket(const char *sock)
 	_lvmetad_socket = sock;
 }

+static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
+				    int ignore_obsolete);
+
 static daemon_reply _lvmetad_send(const char *id, ...)
 {
 	va_list ap;
-	daemon_reply repl;
+	daemon_reply repl = { 0 };
 	daemon_request req;
 	unsigned num_rescans = 0;
 	unsigned total_usecs_waited = 0;
@@ -147,8 +158,10 @@ static daemon_reply _lvmetad_send(const char *id, ...)
 retry:
 	req = daemon_request_make(id);

-	if (_lvmetad_token)
-		daemon_request_extend(req, "token = %s", _lvmetad_token, NULL);
+	if (_lvmetad_token && !daemon_request_extend(req, "token = %s", _lvmetad_token, NULL)) {
+		repl.error = ENOMEM;
+		return repl;
+	}

 	va_start(ap, id);
 	daemon_request_extend_v(req, ap);
@@ -181,7 +194,7 @@ retry:
 				max_remaining_sleep_times--;	/* Sleep once before rescanning the first time, then 5 times each time after that. */
 		} else {
 			/* If the re-scan fails here, we try again later. */
-			(void) lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL);
+			(void) _lvmetad_pvscan_all_devs(_lvmetad_cmd, NULL, 0);
 			num_rescans++;
 			max_remaining_sleep_times = 5;
 		}
@@ -260,19 +273,21 @@ static int _read_mda(struct lvmcache_info *info,
 	return 0;
 }

-static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
-						   struct dm_config_node *cn,
-						   dev_t fallback)
+static int _pv_populate_lvmcache(struct cmd_context *cmd,
+				 struct dm_config_node *cn,
+				 struct format_type *fmt, dev_t fallback)
 {
-	struct device *dev;
+	struct device *dev, *dev_alternate, *dev_alternate_cache = NULL;
+	struct label *label;
 	struct id pvid, vgid;
 	char mda_id[32];
 	char da_id[32];
 	int i = 0;
-	struct dm_config_node *mda = NULL;
-	struct dm_config_node *da = NULL;
+	struct dm_config_node *mda, *da;
+	struct dm_config_node *alt_devices = dm_config_find_node(cn->child, "devices_alternate");
+	struct dm_config_value *alt_device = NULL;
 	uint64_t offset, size;
-	struct lvmcache_info *info;
+	struct lvmcache_info *info, *info_alternate;
 	const char *pvid_txt = dm_config_find_str(cn->child, "id", NULL),
 		   *vgid_txt = dm_config_find_str(cn->child, "vgid", NULL),
 		   *vgname = dm_config_find_str(cn->child, "vgname", NULL),
@@ -281,11 +296,12 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
 	uint64_t devsize = dm_config_find_int64(cn->child, "dev_size", 0),
 		 label_sector = dm_config_find_int64(cn->child, "label_sector", 0);

-	struct format_type *fmt = fmt_name ? get_format_by_name(cmd, fmt_name) : NULL;
+	if (!fmt && fmt_name)
+		fmt = get_format_by_name(cmd, fmt_name);

 	if (!fmt) {
 		log_error("PV %s not recognised. Is the device missing?", pvid_txt);
-		return NULL;
+		return 0;
 	}

 	dev = dev_cache_get_by_devt(devt, cmd->filter);
@@ -293,18 +309,18 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
 		dev = dev_cache_get_by_devt(fallback, cmd->filter);

 	if (!dev) {
-		log_error("No device found for PV %s.", pvid_txt);
-		return NULL;
+		log_warn("WARNING: Device for PV %s not found or rejected by a filter.", pvid_txt);
+		return 0;
 	}

 	if (!pvid_txt || !id_read_format(&pvid, pvid_txt)) {
 		log_error("Missing or ill-formatted PVID for PV: %s.", pvid_txt);
-		return NULL;
+		return 0;
 	}

 	if (vgid_txt) {
 		if (!id_read_format(&vgid, vgid_txt))
-			return_NULL;
+			return_0;
 	} else
 		strcpy((char*)&vgid, fmt->orphan_vg_name);

@@ -313,7 +329,7 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,

 	if (!(info = lvmcache_add(fmt->labeller, (const char *)&pvid, dev,
 				  vgname, (const char *)&vgid, 0)))
-		return_NULL;
+		return_0;

 	lvmcache_get_label(info)->sector = label_sector;
 	lvmcache_get_label(info)->dev = dev;
@@ -354,12 +370,59 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
 		++i;
 	} while (da);

-	return info;
+	if (alt_devices)
+		alt_device = alt_devices->v;
+
+	while (alt_device) {
+		dev_alternate = dev_cache_get_by_devt(alt_device->v.i, cmd->filter);
+		if (dev_alternate) {
+			if ((info_alternate = lvmcache_add(fmt->labeller, (const char *)&pvid, dev_alternate,
+							   vgname, (const char *)&vgid, 0))) {
+				dev_alternate_cache = dev_alternate;
+				info = info_alternate;
+				lvmcache_get_label(info)->dev = dev_alternate;
+			}
+		} else {
+			log_warn("Duplicate of PV %s dev %s exists on unknown device %"PRId64 ":%" PRId64,
+				 pvid_txt, dev_name(dev), MAJOR(alt_device->v.i), MINOR(alt_device->v.i));
+		}
+		alt_device = alt_device->next;
+	}
+
+	/*
+	 * Update lvmcache with the info about the alternate device by
+	 * reading its label, which should update lvmcache.
+	 */
+	if (dev_alternate_cache) {
+		if (!label_read(dev_alternate_cache, &label, 0)) {
+			log_warn("No PV label found on duplicate device %s.", dev_name(dev_alternate_cache));
+		}
+	}
+
+	lvmcache_set_preferred_duplicates((const char *)&vgid);
+	return 1;
+}
+
+static int _pv_update_struct_pv(struct physical_volume *pv, struct format_instance *fid)
+{
+	struct lvmcache_info *info;
+	if ((info = lvmcache_info_from_pvid((const char *)&pv->id, 0))) {
+		pv->label_sector = lvmcache_get_label(info)->sector;
+		pv->dev = lvmcache_device(info);
+		if (!pv->dev)
+			pv->status |= MISSING_PV;
+		if (!lvmcache_fid_add_mdas_pv(info, fid))
+			return_0;
+                pv->fid = fid;
+	} else
+		pv->status |= MISSING_PV; /* probably missing */
+	return 1;
 }

 struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgname, const char *vgid)
 {
 	struct volume_group *vg = NULL;
+	struct volume_group *vg2 = NULL;
 	daemon_reply reply;
 	int found;
 	char uuid[64];
@@ -371,7 +434,6 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
 	struct format_type *fmt;
 	struct dm_config_node *pvcn;
 	struct pv_list *pvl;
-	struct lvmcache_info *info;

 	if (!lvmetad_active())
 		return NULL;
@@ -420,24 +482,40 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna

 		if ((pvcn = dm_config_find_node(top, "metadata/physical_volumes")))
 			for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
-				_pv_populate_lvmcache(cmd, pvcn, 0);
+				_pv_populate_lvmcache(cmd, pvcn, fmt, 0);
+
+		if ((pvcn = dm_config_find_node(top, "metadata/outdated_pvs")))
+			for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
+				_pv_populate_lvmcache(cmd, pvcn, fmt, 0);

 		top->key = name;
-		if (!(vg = import_vg_from_config_tree(reply.cft, fid)))
+		if (!(vg = import_vg_from_lvmetad_config_tree(reply.cft, fid)))
 			goto_out;

+		/*
+		 * locking may have detected a newer vg version and
+		 * invalidated the cached vg.
+		 */
+		if (dm_config_find_node(reply.cft->root, "vg_invalid")) {
+			log_debug_lvmetad("Update invalid lvmetad cache for VG %s", vgname);
+			vg2 = lvmetad_pvscan_vg(cmd, vg);
+			release_vg(vg);
+			vg = vg2;
+			fid = vg->fid;
+		}
+
 		dm_list_iterate_items(pvl, &vg->pvs) {
-			if ((info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
-				pvl->pv->label_sector = lvmcache_get_label(info)->sector;
-				pvl->pv->dev = lvmcache_device(info);
-				if (!pvl->pv->dev)
-					pvl->pv->status |= MISSING_PV;
-				if (!lvmcache_fid_add_mdas_pv(info, fid)) {
-					vg = NULL;
-					goto_out;	/* FIXME error path */
-				}
-			} else
-				pvl->pv->status |= MISSING_PV; /* probably missing */
+			if (!_pv_update_struct_pv(pvl->pv, fid)) {
+				vg = NULL;
+				goto_out;	/* FIXME error path */
+			}
+		}
+
+		dm_list_iterate_items(pvl, &vg->pvs_outdated) {
+			if (!_pv_update_struct_pv(pvl->pv, fid)) {
+				vg = NULL;
+				goto_out;	/* FIXME error path */
+			}
 		}

 		lvmcache_update_vg(vg, 0);
@@ -571,7 +649,7 @@ int lvmetad_pv_lookup(struct cmd_context *cmd, struct id pvid, int *found)

 	if (!(cn = dm_config_find_node(reply.cft->root, "physical_volume")))
 		goto_out;
-        else if (!_pv_populate_lvmcache(cmd, cn, 0))
+        else if (!_pv_populate_lvmcache(cmd, cn, NULL, 0))
 		goto_out;

 out_success:
@@ -601,7 +679,7 @@ int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *f
 		goto out_success;

 	cn = dm_config_find_node(reply.cft->root, "physical_volume");
-	if (!cn || !_pv_populate_lvmcache(cmd, cn, dev->dev))
+	if (!cn || !_pv_populate_lvmcache(cmd, cn, NULL, dev->dev))
 		goto_out;

 out_success:
@@ -629,13 +707,63 @@ int lvmetad_pv_list_to_lvmcache(struct cmd_context *cmd)

 	if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes")))
 		for (cn = cn->child; cn; cn = cn->sib)
-			_pv_populate_lvmcache(cmd, cn, 0);
+			_pv_populate_lvmcache(cmd, cn, NULL, 0);

 	daemon_reply_destroy(reply);

 	return 1;
 }

+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids)
+{
+	struct vgnameid_list *vgnl;
+	struct id vgid;
+	const char *vgid_txt;
+	const char *vg_name;
+	daemon_reply reply;
+	struct dm_config_node *cn;
+
+	log_debug_lvmetad("Asking lvmetad for complete list of known VG ids/names");
+	reply = _lvmetad_send("vg_list", NULL);
+	if (!_lvmetad_handle_reply(reply, "list VGs", "", NULL)) {
+		daemon_reply_destroy(reply);
+		return_0;
+	}
+
+	if ((cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
+		for (cn = cn->child; cn; cn = cn->sib) {
+			vgid_txt = cn->key;
+			if (!id_read_format(&vgid, vgid_txt)) {
+				stack;
+				continue;
+			}
+
+			if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+				log_error("vgnameid_list allocation failed.");
+				return 0;
+			}
+
+			if (!(vg_name = dm_config_find_str(cn->child, "name", NULL))) {
+				log_error("vg_list no name found.");
+				return 0;
+			}
+
+			vgnl->vgid = dm_pool_strdup(cmd->mem, (char *)&vgid);
+			vgnl->vg_name = dm_pool_strdup(cmd->mem, vg_name);
+
+			if (!vgnl->vgid || !vgnl->vg_name) {
+				log_error("vgnameid_list member allocation failed.");
+				return 0;
+			}
+
+			dm_list_add(vgnameids, &vgnl->list);
+		}
+	}
+
+	daemon_reply_destroy(reply);
+	return 1;
+}
+
 int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd)
 {
 	struct volume_group *tmp;
@@ -824,6 +952,51 @@ int lvmetad_pv_found(const struct id *pvid, struct device *dev, const struct for
 	     daemon_reply_int(reply, "seqno_after", -1) != daemon_reply_int(reply, "seqno_before", -1)))
 		log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);

+	/*
+	 * pvscan --cache does not perform any lvmlockd locking, and
+	 * pvscan --cache -aay skips autoactivation in lockd VGs.
+	 *
+	 * pvscan --cache populates lvmetad with VG metadata from disk.
+	 * No lvmlockd locking is needed.  It is expected that lockd VG
+	 * metadata that is read by pvscan and populated in lvmetad may
+	 * be immediately stale due to changes to the VG from other hosts
+	 * during or after this pvscan.  This is normal and not a problem.
+	 * When a subsequent lvm command uses the VG, it will lock the VG
+	 * with lvmlockd, read the VG from lvmetad, and update the cached
+	 * copy from disk if necessary.
+	 *
+	 * pvscan --cache -aay does not activate LVs in lockd VGs because
+	 * activation requires locking, and a lock-start operation is needed
+	 * on a lockd VG before any locking can be performed in it.
+	 *
+	 * An equivalent of pvscan --cache -aay for lockd VGs is:
+	 * 1. pvscan --cache
+	 * 2. vgchange --lock-start
+	 * 3. vgchange -aay -S 'locktype=sanlock || locktype=dlm'
+	 *
+	 * [We could eventually add support for autoactivating lockd VGs
+	 * using pvscan by incorporating the lock start step (which can
+	 * take a long time), but there may be a better option than
+	 * continuing to overload pvscan.]
+	 * 
+	 * Stages of starting a lockd VG:
+	 *
+	 * . pvscan --cache populates lockd VGs in lvmetad without locks,
+	 *   and this initial cached copy may quickly become stale.
+	 *
+	 * . vgchange --lock-start VG reads the VG without the VG lock
+	 *   because no locks are available until the locking is started.
+	 *   It only uses the VG name and lock_type from the VG metadata,
+	 *   and then only uses it to start the VG lockspace in lvmlockd.
+	 *
+	 * . Further lvm commands, e.g. activation, can then lock the VG
+	 *   with lvmlockd and use current VG metdata.
+	 */
+	if (handler && vg && is_lockd_type(vg->lock_type)) {
+		log_debug_lvmetad("Skip pvscan activation for lockd type VG %s", vg->name);
+		handler = NULL;
+	}
+
 	if (result && handler) {
 		status = daemon_reply_str(reply, "status", "<missing>");
 		vgname = daemon_reply_str(reply, "vgname", "<missing>");
@@ -889,7 +1062,10 @@ struct _lvmetad_pvscan_baton {
 static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 {
 	struct _lvmetad_pvscan_baton *b = baton;
-	struct volume_group *this = mda->ops->vg_read(b->fid, "", mda, 1);
+	struct volume_group *this;
+
+	if (!(this = mda_is_ignored(mda) ? NULL : mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
+		return 1;

 	/* FIXME Also ensure contents match etc. */
 	if (!b->vg || this->seqno > b->vg->seqno)
@@ -900,8 +1076,102 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 	return 1;
 }

+/*
+ * The lock manager may detect that the vg cached in lvmetad is out of date,
+ * due to something like an lvcreate from another host.
+ * This is limited to changes that only affect the vg (not global state like
+ * orphan PVs), so we only need to reread mdas on the vg's existing pvs.
+ */
+
+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+	struct volume_group *vg_ret = NULL;
+	struct dm_config_tree *vgmeta_ret = NULL;
+	struct dm_config_tree *vgmeta;
+	struct pv_list *pvl;
+	struct lvmcache_info *info;
+	struct format_instance *fid;
+	struct format_instance_ctx fic = { .type = 0 };
+	struct _lvmetad_pvscan_baton baton;
+
+	dm_list_iterate_items(pvl, &vg->pvs) {
+		/* missing pv */
+		if (!pvl->pv->dev)
+			continue;
+
+		if (!(info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
+			log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv));
+			return NULL;
+		}
+
+		baton.vg = NULL;
+		baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+
+		if (!baton.fid)
+			return NULL;
+
+		if (baton.fid->fmt->features & FMT_OBSOLETE) {
+			log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+			  	baton.fid->fmt->name, dev_name(pvl->pv->dev));
+			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+			return NULL;
+		}
+
+		lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
+
+		if (!baton.vg) {
+			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+			return NULL;
+		}
+
+		if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
+			log_error("VG export to config tree failed");
+			release_vg(baton.vg);
+			return NULL;
+		}
+
+		if (!vgmeta_ret) {
+			vgmeta_ret = vgmeta;
+		} else {
+			if (!compare_config(vgmeta_ret->root, vgmeta->root)) {
+				log_error("VG metadata comparison failed");
+				dm_config_destroy(vgmeta);
+				dm_config_destroy(vgmeta_ret);
+				release_vg(baton.vg);
+				return NULL;
+			}
+			dm_config_destroy(vgmeta);
+		}
+
+		release_vg(baton.vg);
+	}
+
+	if (vgmeta_ret) {
+		fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+		if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) {
+			log_error("VG import from config tree failed");
+			lvmcache_fmt(info)->ops->destroy_instance(fid);
+			goto out;
+		}
+
+		/*
+		 * Update lvmetad with the newly read version of the VG.
+		 * The "precommitted" name is a misnomer in this case,
+		 * but that is the field which lvmetad_vg_update() uses
+		 * to send the metadata cft to lvmetad.
+		 */
+		vg_ret->cft_precommitted = vgmeta_ret;
+		if (!lvmetad_vg_update(vg_ret))
+			log_error("Failed to update lvmetad with new VG meta");
+		vg_ret->cft_precommitted = NULL;
+		dm_config_destroy(vgmeta_ret);
+	}
+out:
+	return vg_ret;
+}
+
 int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
-			  activation_handler handler)
+			  activation_handler handler, int ignore_obsolete)
 {
 	struct label *label;
 	struct lvmcache_info *info;
@@ -930,9 +1200,16 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 		goto_bad;

 	if (baton.fid->fmt->features & FMT_OBSOLETE) {
-		log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
-			  baton.fid->fmt->name, dev_name(dev));
+		if (ignore_obsolete)
+			log_warn("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+				  baton.fid->fmt->name, dev_name(dev));
+		else
+			log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+				  baton.fid->fmt->name, dev_name(dev));
 		lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+
+		if (ignore_obsolete)
+			return 1;
 		return 0;
 	}

@@ -945,7 +1222,7 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 	 * can scan further devices.
 	 */
 	if (!baton.vg && !(baton.fid->fmt->features & FMT_MDAS))
-		baton.vg = ((struct metadata_area *) dm_list_first(&baton.fid->metadata_areas_in_use))->ops->vg_read(baton.fid, lvmcache_vgname_from_info(info), NULL, 1);
+		baton.vg = ((struct metadata_area *) dm_list_first(&baton.fid->metadata_areas_in_use))->ops->vg_read(baton.fid, lvmcache_vgname_from_info(info), NULL, NULL, NULL, 1);

 	if (!baton.vg)
 		lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
@@ -971,7 +1248,8 @@ bad:
 	return 0;
 }

-int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
+static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler,
+				    int ignore_obsolete)
 {
 	struct dev_iter *iter;
 	struct device *dev;
@@ -1013,7 +1291,7 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
 			stack;
 			break;
 		}
-		if (!lvmetad_pvscan_single(cmd, dev, handler))
+		if (!lvmetad_pvscan_single(cmd, dev, handler, ignore_obsolete))
 			r = 0;
 	}

@@ -1028,3 +1306,340 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
 	return r;
 }

+int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
+{
+	return _lvmetad_pvscan_all_devs(cmd, handler, 0);
+}
+
+/* 
+ * FIXME Implement this function, skipping PVs known to belong to local or clustered,
+ * non-exported VGs.
+ */
+int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handler)
+{
+	return _lvmetad_pvscan_all_devs(cmd, handler, 1);
+}
+
+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg)
+{
+	char uuid[64];
+	daemon_reply reply;
+	int result;
+
+	if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
+		return_0;
+
+	reply = _lvmetad_send("vg_clear_outdated_pvs", "vgid = %s", uuid, NULL);
+	result = _lvmetad_handle_reply(reply, "clear the list of outdated PVs", vg->name, NULL);
+	daemon_reply_destroy(reply);
+
+	return result;
+}
+
+/*
+ * Records the state of cached PVs in lvmetad so we can look for changes
+ * after rescanning.
+ */
+struct pv_cache_list {
+	struct dm_list list;
+	dev_t devt;
+	struct id pvid;
+	const char *vgid;
+	unsigned found : 1;
+	unsigned update_udev : 1;
+};
+
+/*
+ * Get the list of PVs known to lvmetad.
+ */
+static int _lvmetad_get_pv_cache_list(struct cmd_context *cmd, struct dm_list *pvc_list)
+{
+	daemon_reply reply;
+	struct dm_config_node *cn;
+	struct pv_cache_list *pvcl;
+	const char *pvid_txt;
+	const char *vgid;
+
+	if (!lvmetad_active())
+		return 1;
+
+	log_debug_lvmetad("Asking lvmetad for complete list of known PVs");
+	reply = _lvmetad_send("pv_list", NULL);
+	if (!_lvmetad_handle_reply(reply, "list PVs", "", NULL)) {
+		log_error("lvmetad message failed.");
+		daemon_reply_destroy(reply);
+		return_0;
+	}
+
+	if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes"))) {
+		for (cn = cn->child; cn; cn = cn->sib) {
+			if (!(pvcl = dm_pool_zalloc(cmd->mem, sizeof(*pvcl)))) {
+				log_error("pv_cache_list allocation failed.");
+				return 0;
+			}
+
+			pvid_txt = cn->key;
+			if (!id_read_format(&pvcl->pvid, pvid_txt)) {
+				stack;
+				continue;
+			}
+
+			pvcl->devt = dm_config_find_int(cn->child, "device", 0);
+
+			if ((vgid = dm_config_find_str(cn->child, "vgid", NULL)))
+				pvcl->vgid = dm_pool_strdup(cmd->mem, vgid);
+
+			dm_list_add(pvc_list, &pvcl->list);
+		}
+	}
+
+	daemon_reply_destroy(reply);
+
+	return 1;
+}
+
+/*
+ * Opening the device RDWR should trigger a udev db update.
+ * FIXME: is there a better way to update the udev db than
+ * doing an open/close of the device? - For example writing
+ * "change" to /sys/block/<device>/uevent?
+ */
+static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt)
+{
+	struct device *dev;
+
+	log_debug_devs("device %d:%d open to update udev",
+		       (int)MAJOR(devt), (int)MINOR(devt));
+
+	if (!(dev = dev_cache_get_by_devt(devt, cmd->lvmetad_filter))) {
+		log_error("_update_pv_in_udev no dev found");
+		return;
+	}
+
+	if (!dev_open(dev)) {
+		stack;
+		return;
+	}
+
+	if (!dev_close(dev))
+		stack;
+}
+
+/*
+ * Compare before and after PV lists from before/after rescanning,
+ * and update udev db for changes.
+ *
+ * For PVs that have changed pvid or vgid in lvmetad from rescanning,
+ * there may be information in the udev database to update, so open
+ * these devices to trigger a udev update.
+ *
+ * "before" refers to the list of pvs from lvmetad before rescanning
+ * "after" refers to the list of pvs from lvmetad after rescanning
+ *
+ * Comparing both lists, we can see which PVs changed (pvid or vgid),
+ * and trigger a udev db update for those.
+ */
+static void _update_changed_pvs_in_udev(struct cmd_context *cmd,
+					struct dm_list *pvc_before,
+					struct dm_list *pvc_after)
+{
+	struct pv_cache_list *before;
+	struct pv_cache_list *after;
+	char id_before[ID_LEN + 1]  __attribute__((aligned(8)));
+	char id_after[ID_LEN + 1]  __attribute__((aligned(8)));
+	int found;
+
+	dm_list_iterate_items(before, pvc_before) {
+		found = 0;
+
+		dm_list_iterate_items(after, pvc_after) {
+			if (after->found)
+				continue;
+
+			if (before->devt != after->devt)
+				continue;
+
+			if (!id_equal(&before->pvid, &after->pvid)) {
+				memset(id_before, 0, sizeof(id_before));
+				memset(id_after, 0, sizeof(id_after));
+				strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+				strncpy(&id_after[0], (char *) &after->pvid, sizeof(id_after) - 1);
+
+				log_debug_devs("device %d:%d changed pvid from %s to %s",
+					       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+					       id_before, id_after);
+
+				before->update_udev = 1;
+
+			} else if ((before->vgid && !after->vgid) ||
+				   (after->vgid && !before->vgid) ||
+				   (before->vgid && after->vgid && strcmp(before->vgid, after->vgid))) {
+
+				log_debug_devs("device %d:%d changed vg from %s to %s",
+					       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+					       before->vgid ?: "none", after->vgid ?: "none");
+
+				before->update_udev = 1;
+			}
+
+			after->found = 1;
+			before->found = 1;
+			found = 1;
+			break;
+		}
+
+		if (!found) {
+			memset(id_before, 0, sizeof(id_before));
+			strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+
+			log_debug_devs("device %d:%d pvid %s vg %s is gone",
+				       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+				       id_before, before->vgid ? before->vgid : "none");
+
+			before->update_udev = 1;
+		}
+	}
+
+	dm_list_iterate_items(before, pvc_before) {
+		if (before->update_udev)
+			_update_pv_in_udev(cmd, before->devt);
+	}
+
+	dm_list_iterate_items(after, pvc_after) {
+		if (after->update_udev)
+			_update_pv_in_udev(cmd, after->devt);
+	}
+}
+
+/*
+ * Before this command was run, some external entity may have
+ * invalidated lvmetad's cache of global information, e.g. lvmlockd.
+ *
+ * The global information includes things like a new VG, a
+ * VG that was removed, the assignment of a PV to a VG;
+ * any change that is not isolated within a single VG.
+ *
+ * The external entity, like a lock manager, would invalidate
+ * the lvmetad global cache if it detected that the global
+ * information had been changed on disk by something other
+ * than a local lvm command, e.g. an lvm command on another
+ * host with access to the same devices.  (How it detects
+ * the change is specific to lock manager or other entity.)
+ *
+ * The effect is that metadata on disk is newer than the metadata
+ * in the local lvmetad daemon, and the local lvmetad's cache
+ * should be updated from disk before this command uses it.
+ *
+ * So, using this function, a command checks if lvmetad's global
+ * cache is valid.  If so, it does nothing.  If not, it rescans
+ * devices to update the lvmetad cache, then it notifies lvmetad
+ * that it's cache is valid again (consistent with what's on disk.)
+ * This command can then go ahead and use the newly refreshed metadata.
+ *
+ * 1. Check if the lvmetad global cache is invalid.
+ * 2. If so, reread metadata from all devices and update the lvmetad cache.
+ * 3. Tell lvmetad that the global cache is now valid.
+ */
+
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
+{
+	struct dm_list pvc_before; /* pv_cache_list */
+	struct dm_list pvc_after; /* pv_cache_list */
+	daemon_reply reply;
+	int global_invalid;
+
+	dm_list_init(&pvc_before);
+	dm_list_init(&pvc_after);
+
+	if (!lvmlockd_use()) {
+		log_error(INTERNAL_ERROR "validate global cache without lvmlockd");
+		return;
+	}
+
+	if (!lvmetad_used())
+		return;
+
+	log_debug_lvmetad("Validating global lvmetad cache");
+
+	if (force)
+		goto do_scan;
+
+	reply = daemon_send_simple(_lvmetad, "get_global_info",
+				   "token = %s", "skip",
+				   NULL);
+
+	if (reply.error) {
+		log_error("lvmetad_validate_global_cache get_global_info error %d", reply.error);
+		goto do_scan;
+	}
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+		log_error("lvmetad_validate_global_cache get_global_info not ok");
+		goto do_scan;
+	}
+
+	global_invalid = daemon_reply_int(reply, "global_invalid", -1);
+
+	daemon_reply_destroy(reply);
+
+	if (!global_invalid) {
+		/* cache is valid */
+		return;
+	}
+
+ do_scan:
+	/*
+	 * Save the current state of pvs from lvmetad so after devices are
+	 * scanned, we can compare to the new state to see if pvs changed.
+	 */
+	_lvmetad_get_pv_cache_list(cmd, &pvc_before);
+
+	/*
+	 * Update the local lvmetad cache so it correctly reflects any
+	 * changes made on remote hosts.
+	 */
+	lvmetad_pvscan_all_devs(cmd, NULL);
+
+	/*
+	 * Clear the global_invalid flag in lvmetad.
+	 * Subsequent local commands that read global state
+	 * from lvmetad will not see global_invalid until
+	 * another host makes another global change.
+	 */
+	reply = daemon_send_simple(_lvmetad, "set_global_info",
+				   "token = %s", "skip",
+				   "global_invalid = %d", 0,
+				   NULL);
+	if (reply.error)
+		log_error("lvmetad_validate_global_cache set_global_info error %d", reply.error);
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
+		log_error("lvmetad_validate_global_cache set_global_info not ok");
+
+	daemon_reply_destroy(reply);
+
+	/*
+	 * Populate this command's lvmcache structures from lvmetad.
+	 */
+	lvmcache_seed_infos_from_lvmetad(cmd);
+
+	/*
+	 * Update the local udev database to reflect PV changes from
+	 * other hosts.
+	 *
+	 * Compare the before and after PV lists, and if a PV's
+	 * pvid or vgid has changed, then open that device to trigger
+	 * a uevent to update the udev db.
+	 *
+	 * This has no direct benefit to lvm, but is just a best effort
+	 * attempt to keep the udev db updated and reflecting current
+	 * lvm information.
+	 *
+	 * FIXME: lvmcache_seed_infos_from_lvmetad() and _lvmetad_get_pv_cache_list()
+	 * each get pv_list from lvmetad, and they could share a single pv_list reply.
+	 */
+	if (!dm_list_empty(&pvc_before)) {
+		_lvmetad_get_pv_cache_list(cmd, &pvc_after);
+		_update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after);
+	}
+}
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -29,15 +29,14 @@ typedef int (*activation_handler) (struct cmd_context *cmd,

 #ifdef LVMETAD_SUPPORT
 /*
- * Initialise the communication with lvmetad. Normally called by
- * lvmcache_init. Sets up a global handle for our process.
+ * Sets up a global handle for our process.
 */
 void lvmetad_init(struct cmd_context *);

 /*
 * Override the use of lvmetad for retrieving scan results and metadata.
 */
-void lvmetad_set_active(int);
+void lvmetad_set_active(struct cmd_context *, int);

 /*
 * Configure the socket that lvmetad_init will use to connect to the daemon.
@@ -59,7 +58,9 @@ int lvmetad_socket_present(void);

 /*
 * Check whether lvmetad is active (where active means both that it is running
- * and that we have a working connection with it).
+ * and that we have a working connection with it). It opens new connection
+ * with lvmetad in the process when lvmetad is supposed to be used and the
+ * connection is not open yet.
 */
 int lvmetad_active(void);

@@ -70,8 +71,9 @@ int lvmetad_active(void);
 void lvmetad_connect_or_warn(void);

 /*
- * Drop connection to lvmetad. A subsequent lvmetad_init() will re-establish
- * the connection (possibly at a different socket path).
+ * Drop connection to lvmetad. A subsequent lvmetad_connect_or_warn or
+ * lvmetad_active will re-establish the connection (possibly at a
+ * different socket path).
 */
 void lvmetad_disconnect(void);

@@ -142,6 +144,12 @@ int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *f
 */
 int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd);

+/*
+ * Request a list of vgid/vgname pairs for all VGs known to lvmetad.
+ * Does not do vg_lookup's on each VG, and does not populate lvmcache.
+ */
+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids);
+
 /*
 * Find a VG by its ID or its name in the lvmetad cache. Gives NULL if the VG is
 * not found.
@@ -153,15 +161,19 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd,
 * Scan a single device and update lvmetad with the result(s).
 */
 int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
-			  activation_handler handler);
+			  activation_handler handler, int ignore_obsolete);

 int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler);
+int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handler);
+
+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);

 #  else		/* LVMETAD_SUPPORT */

 #    define lvmetad_init(cmd)	do { } while (0)
 #    define lvmetad_disconnect()	do { } while (0)
-#    define lvmetad_set_active(a)	do { } while (0)
+#    define lvmetad_set_active(cmd, a)	do { } while (0)
 #    define lvmetad_set_socket(a)	do { } while (0)
 #    define lvmetad_used()	(0)
 #    define lvmetad_socket_present()	(0)
@@ -178,9 +190,13 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler)
 #    define lvmetad_pv_lookup(cmd, pvid, found)	(0)
 #    define lvmetad_pv_lookup_by_dev(cmd, dev, found)	(0)
 #    define lvmetad_vg_list_to_lvmcache(cmd)	(1)
+#    define lvmetad_get_vgnameids(cmd, vgnameids)       do { } while (0)
 #    define lvmetad_vg_lookup(cmd, vgname, vgid)	(NULL)
-#    define lvmetad_pvscan_single(cmd, dev, handler)	(0)
+#    define lvmetad_pvscan_single(cmd, dev, handler, ignore_obsolete)	(0)
 #    define lvmetad_pvscan_all_devs(cmd, handler)	(0)
+#    define lvmetad_pvscan_foreign_vgs(cmd, handler)	(0)
+#    define lvmetad_vg_clear_outdated_pvs(vg)           (1)
+#    define lvmetad_validate_global_cache(cmd, force)	do { } while (0)

 #  endif	/* LVMETAD_SUPPORT */

--- a/lib/cache_segtype/cache.c
+++ b/lib/cache_segtype/cache.c
@@ -19,7 +19,6 @@
 #include "text_export.h"
 #include "config.h"
 #include "str_list.h"
-#include "targets.h"
 #include "lvm-string.h"
 #include "activate.h"
 #include "metadata.h"
@@ -31,20 +30,13 @@
                  dm_config_parent_name(sn), seg->lv->name), 0;


-static const char *_name(const struct lv_segment *seg)
-{
-	return seg->segtype->name;
-}
-
 static int _cache_pool_text_import(struct lv_segment *seg,
 				   const struct dm_config_node *sn,
 				   struct dm_hash_table *pv_hash __attribute__((unused)))
 {
-	uint32_t chunk_size;
 	struct logical_volume *data_lv, *meta_lv;
 	const char *str = NULL;
-	char *argv_str;
-	struct dm_pool *mem = seg->lv->vg->vgmem; //FIXME: what mempool should be used?
+	struct dm_pool *mem = seg->lv->vg->vgmem;

 	if (!dm_config_has_node(sn, "data"))
 		return SEG_LOG_ERROR("Cache data not specified in");
@@ -52,7 +44,7 @@ static int _cache_pool_text_import(struct lv_segment *seg,
 		return SEG_LOG_ERROR("Cache data must be a string in");
 	if (!(data_lv = find_lv(seg->lv->vg, str)))
 		return SEG_LOG_ERROR("Unknown logical volume %s specified for "
-			  "cache data in", str);
+				     "cache data in", str);

 	if (!dm_config_has_node(sn, "metadata"))
 		return SEG_LOG_ERROR("Cache metadata not specified in");
@@ -60,101 +52,68 @@ static int _cache_pool_text_import(struct lv_segment *seg,
 		return SEG_LOG_ERROR("Cache metadata must be a string in");
 	if (!(meta_lv = find_lv(seg->lv->vg, str)))
 		return SEG_LOG_ERROR("Unknown logical volume %s specified for "
-			  "cache metadata in", str);
+				     "cache metadata in", str);

-	if (!dm_config_get_uint32(sn, "chunk_size", &chunk_size))
+	if (!dm_config_get_uint32(sn, "chunk_size", &seg->chunk_size))
 		return SEG_LOG_ERROR("Couldn't read cache chunk_size in");

 	/*
 	 * Read in features:
-	 *   cache_mode = {writethrough|writeback}
+	 *   cache_mode = {passthrough|writethrough|writeback}
 	 *
 	 *   'cache_mode' does not have to be present.
 	 */
 	if (dm_config_has_node(sn, "cache_mode")) {
 		if (!(str = dm_config_find_str(sn, "cache_mode", NULL)))
 			return SEG_LOG_ERROR("cache_mode must be a string in");
-		if (!get_cache_mode(str, &seg->feature_flags))
+		if (!set_cache_pool_feature(&seg->feature_flags, str))
 			return SEG_LOG_ERROR("Unknown cache_mode in");
+	} else
+		/* When missed in metadata, it's an old stuff - use writethrough */
+		seg->feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+
+	if (dm_config_has_node(sn, "policy")) {
+		if (!(str = dm_config_find_str(sn, "policy", NULL)))
+			return SEG_LOG_ERROR("policy must be a string in");
+		if (!(seg->policy_name = dm_pool_strdup(mem, str)))
+			return SEG_LOG_ERROR("Failed to duplicate policy in");
+	} else {
+		/* Cannot use 'just' default, so pick one */
+		seg->policy_name = DEFAULT_CACHE_POOL_POLICY; /* FIXME make configurable */
+		/* FIXME maybe here should be always 'mq' */
+		log_warn("WARNING: cache_policy undefined, using default \"%s\" policy.",
+			 seg->policy_name);
 	}

 	/*
-	 * Read in core arguments (these are key/value pairs)
-	 *   core_argc = <# args>
-	 *   core_argv = "[<key> <value>]..."
+	 * Read in policy args:
+	 *   policy_settings {
+	 *	migration_threshold=2048
+	 *	sequention_threashold=100
+	 *	random_threashold=200
+	 *	read_promote_adjustment=10
+	 *	write_promote_adjustment=20
+	 *	discard_promote_adjustment=40
 	 *
-	 *   'core_argc' does not have to be present.  If it is not present,
-	 *   any other core_* fields are ignored.  If it is present, then
-	 *   'core_argv' must be present - even if they are
-	 *   'core_argc = 0' and 'core_argv = ""'.
-	 */
-	if (dm_config_has_node(sn, "core_argc")) {
-		if (!dm_config_has_node(sn, "core_argv"))
-			return SEG_LOG_ERROR("not all core arguments defined in");
-
-		if (!dm_config_get_uint32(sn, "core_argc", &seg->core_argc))
-			return SEG_LOG_ERROR("Unable to read core_argc in");
-
-		str = dm_config_find_str(sn, "core_argv", NULL);
-		if ((str && !seg->core_argc) || (!str && seg->core_argc))
-			return SEG_LOG_ERROR("core_argc and core_argv do"
-					     " not match in");
-
-		if (!(seg->core_argv =
-		      dm_pool_alloc(mem, sizeof(char *) * seg->core_argc)))
-			return_0;
-		if (str &&
-		    (!(argv_str = dm_pool_strdup(mem, str)) ||
-		     ((int)seg->core_argc != dm_split_words(argv_str, seg->core_argc,
-							    0, (char **) seg->core_argv))))
-			return SEG_LOG_ERROR("core_argc and core_argv do"
-					     " not match in");
-	}
-
-	/*
-	 * Read in policy:
-	 *   policy_name = "<policy_name>"
-	 *   policy_argc = <# args>
-	 *   policy_argv = "[<key> <value>]..."
+	 *	<key> = <value>
+	 *	<key> = <value>
+	 *	...
+	 *   }
 	 *
-	 *   'policy_name' does not have to be present.  If it is not present,
-	 *   any other policy_* fields are ignored.  If it is present, then
-	 *   the other policy_* fields must be present - even if they are
-	 *   'policy_argc = 0' and 'policy_argv = ""'.
+	 *   If the policy is not present, default policy is used.
 	 */
-	if (dm_config_has_node(sn, "policy_name")) {
-		if (!dm_config_has_node(sn, "policy_argc") ||
-		    !dm_config_has_node(sn, "policy_argv"))
-			return SEG_LOG_ERROR("not all policy arguments defined in");
-		if (!(str = dm_config_find_str(sn, "policy_name", NULL)))
-			return SEG_LOG_ERROR("policy_name must be a string in");
-		seg->policy_name = dm_pool_strdup(mem, str);
+	if ((sn = dm_config_find_node(sn, "policy_settings"))) {
+		if (sn->v)
+			return SEG_LOG_ERROR("policy_settings must be a section in");

-		if (!dm_config_get_uint32(sn, "policy_argc", &seg->policy_argc))
-			return SEG_LOG_ERROR("Unable to read policy_argc in");
-
-		str = dm_config_find_str(sn, "policy_argv", NULL);
-		if ((str && !seg->policy_argc) || (!str && seg->policy_argc))
-			return SEG_LOG_ERROR("policy_argc and policy_argv do"
-					     " not match in");
-
-		if (!(seg->policy_argv =
-		      dm_pool_alloc(mem, sizeof(char *) * seg->policy_argc)))
+		if (!(seg->policy_settings = dm_config_clone_node_with_mem(mem, sn, 0)))
 			return_0;
-		if (str &&
-		    (!(argv_str = dm_pool_strdup(mem, str)) ||
-		     ((int)seg->policy_argc != dm_split_words(argv_str,
-							      seg->policy_argc,
-							      0, (char **) seg->policy_argv))))
-			return SEG_LOG_ERROR("policy_argc and policy_argv do"
-					     " not match in");
 	}

 	if (!attach_pool_data_lv(seg, data_lv))
 		return_0;
 	if (!attach_pool_metadata_lv(seg, meta_lv))
 		return_0;
-	seg->chunk_size = chunk_size;

 	return 1;
 }
@@ -170,43 +129,30 @@ static int _cache_pool_text_import_area_count(const struct dm_config_node *sn,
 static int _cache_pool_text_export(const struct lv_segment *seg,
 				   struct formatter *f)
 {
-	unsigned i;
-	char buf[256]; //FIXME: IS THERE AN 'outf' THAT DOESN'T DO NEWLINE?!?
-	uint32_t feature_flags = seg->feature_flags;
+	const char *cache_mode;
+
+	if (!(cache_mode = get_cache_pool_cachemode_name(seg)))
+		return_0;
+
+	if (!seg->policy_name) {
+		log_error(INTERNAL_ERROR "Policy name for %s is not defined.",
+			  display_lvname(seg->lv));
+		return 0;
+	}

 	outf(f, "data = \"%s\"", seg_lv(seg, 0)->name);
 	outf(f, "metadata = \"%s\"", seg->metadata_lv->name);
 	outf(f, "chunk_size = %" PRIu32, seg->chunk_size);
+	outf(f, "cache_mode = \"%s\"", cache_mode);
+	outf(f, "policy = \"%s\"", seg->policy_name);

-	if (feature_flags) {
-		if (feature_flags & DM_CACHE_FEATURE_WRITETHROUGH) {
-			outf(f, "cache_mode = \"writethrough\"");
-			feature_flags &= ~DM_CACHE_FEATURE_WRITETHROUGH;
-		} else if (feature_flags & DM_CACHE_FEATURE_WRITEBACK) {
-			outf(f, "cache_mode = \"writeback\"");
-			feature_flags &= ~DM_CACHE_FEATURE_WRITEBACK;
-		} else {
-			log_error(INTERNAL_ERROR "Unknown feature flags "
-				  "in cache_pool segment for %s", seg->lv->name);
+	if (seg->policy_settings) {
+		if (strcmp(seg->policy_settings->key, "policy_settings")) {
+			log_error(INTERNAL_ERROR "Incorrect policy_settings tree, %s.",
+				  seg->policy_settings->key);
 			return 0;
 		}
-	}
-
-	if (seg->core_argc) {
-		outf(f, "core_argc = %u", seg->core_argc);
-		outf(f, "core_argv = \"");
-		for (i = 0; i < seg->core_argc; i++)
-			outf(f, "%s%s", i ? " " : "", seg->core_argv[i]);
-		outf(f, "\"");
-	}
-
-	if (seg->policy_name) {
-		outf(f, "policy_name = \"%s\"", seg->policy_name);
-		outf(f, "policy_argc = %u", seg->policy_argc);
-		buf[0] = '\0';
-		for (i = 0; i < seg->policy_argc; i++)
-			sprintf(buf, "%s%s", i ? " " : "", seg->policy_argv[i]);
-		outf(f, "policy_argv = \"%s\"", buf);
+		out_config_node(f, seg->policy_settings);
 	}

 	return 1;
@@ -262,7 +208,6 @@ static int _modules_needed(struct dm_pool *mem,
 #endif /* DEVMAPPER_SUPPORT */

 static struct segtype_handler _cache_pool_ops = {
-	.name = _name,
 	.text_import = _cache_pool_text_import,
 	.text_import_area_count = _cache_pool_text_import_area_count,
 	.text_export = _cache_pool_text_export,
@@ -280,7 +225,7 @@ static int _cache_text_import(struct lv_segment *seg,
 			      struct dm_hash_table *pv_hash __attribute__((unused)))
 {
 	struct logical_volume *pool_lv, *origin_lv;
-	const char *name = NULL;
+	const char *name;

 	if (!dm_config_has_node(sn, "cache_pool"))
 		return SEG_LOG_ERROR("cache_pool not specified in");
@@ -288,7 +233,7 @@ static int _cache_text_import(struct lv_segment *seg,
 		return SEG_LOG_ERROR("cache_pool must be a string in");
 	if (!(pool_lv = find_lv(seg->lv->vg, name)))
 		return SEG_LOG_ERROR("Unknown logical volume %s specified for "
-			  "cache_pool in", name);
+				     "cache_pool in", name);

 	if (!dm_config_has_node(sn, "origin"))
 		return SEG_LOG_ERROR("Cache origin not specified in");
@@ -296,10 +241,17 @@ static int _cache_text_import(struct lv_segment *seg,
 		return SEG_LOG_ERROR("Cache origin must be a string in");
 	if (!(origin_lv = find_lv(seg->lv->vg, name)))
 		return SEG_LOG_ERROR("Unknown logical volume %s specified for "
-			  "cache origin in", name);
-
+				     "cache origin in", name);
 	if (!set_lv_segment_area_lv(seg, 0, origin_lv, 0, 0))
 		return_0;
+
+	seg->cleaner_policy = 0;
+	if (dm_config_has_node(sn, "cleaner") &&
+	    !dm_config_get_uint32(sn, "cleaner", &seg->cleaner_policy))
+		return SEG_LOG_ERROR("Could not read cache cleaner in");
+
+	seg->lv->status |= strstr(seg->lv->name, "_corig") ? LV_PENDING_DELETE : 0;
+
 	if (!attach_pool_lv(seg, pool_lv, NULL, NULL))
 		return_0;

@@ -322,6 +274,9 @@ static int _cache_text_export(const struct lv_segment *seg, struct formatter *f)
 	outf(f, "cache_pool = \"%s\"", seg->pool_lv->name);
 	outf(f, "origin = \"%s\"", seg_lv(seg, 0)->name);

+	if (seg->cleaner_policy)
+		outf(f, "cleaner = 1");
+
 	return 1;
 }

@@ -335,9 +290,16 @@ static int _cache_add_target_line(struct dev_manager *dm,
 				 struct dm_tree_node *node, uint64_t len,
 				 uint32_t *pvmove_mirror_count __attribute__((unused)))
 {
-	struct lv_segment *cache_pool_seg = first_seg(seg->pool_lv);
+	struct lv_segment *cache_pool_seg;
 	char *metadata_uuid, *data_uuid, *origin_uuid;

+	if (!seg->pool_lv || !seg_is_cache(seg)) {
+		log_error(INTERNAL_ERROR "Passed segment is not cache.");
+		return 0;
+	}
+
+	cache_pool_seg = first_seg(seg->pool_lv);
+
 	if (!(metadata_uuid = build_dm_uuid(mem, cache_pool_seg->metadata_lv, NULL)))
 		return_0;

@@ -348,24 +310,20 @@ static int _cache_add_target_line(struct dev_manager *dm,
 		return_0;

 	if (!dm_tree_node_add_cache_target(node, len,
+					   cache_pool_seg->feature_flags,
 					   metadata_uuid,
 					   data_uuid,
 					   origin_uuid,
-					   cache_pool_seg->chunk_size,
-					   cache_pool_seg->feature_flags,
-					   cache_pool_seg->core_argc,
-					   cache_pool_seg->core_argv,
-					   cache_pool_seg->policy_name,
-					   cache_pool_seg->policy_argc,
-					   cache_pool_seg->policy_argv))
+					   seg->cleaner_policy ? "cleaner" : cache_pool_seg->policy_name,
+					   seg->cleaner_policy ? NULL : cache_pool_seg->policy_settings,
+					   cache_pool_seg->chunk_size))
 		return_0;

-	return add_areas_line(dm, seg, node, 0u, seg->area_count);
+	return 1;
 }
 #endif /* DEVMAPPER_SUPPORT */

 static struct segtype_handler _cache_ops = {
-	.name = _name,
 	.text_import = _cache_text_import,
 	.text_import_area_count = _cache_text_import_area_count,
 	.text_export = _cache_text_export,
@@ -395,12 +353,10 @@ int init_cache_segtypes(struct cmd_context *cmd,
 		log_error("Failed to allocate memory for cache_pool segtype");
 		return 0;
 	}
-	segtype->cmd = cmd;

 	segtype->name = "cache-pool";
-	segtype->flags = SEG_CACHE_POOL;
+	segtype->flags = SEG_CACHE_POOL | SEG_CANNOT_BE_ZEROED | SEG_ONLY_EXCLUSIVE;
 	segtype->ops = &_cache_pool_ops;
-	segtype->private = NULL;

 	if (!lvm_register_segtype(seglib, segtype))
 		return_0;
@@ -411,12 +367,10 @@ int init_cache_segtypes(struct cmd_context *cmd,
 		log_error("Failed to allocate memory for cache segtype");
 		return 0;
 	}
-	segtype->cmd = cmd;

 	segtype->name = "cache";
-	segtype->flags = SEG_CACHE;
+	segtype->flags = SEG_CACHE | SEG_ONLY_EXCLUSIVE;
 	segtype->ops = &_cache_ops;
-	segtype->private = NULL;

 	if (!lvm_register_segtype(seglib, segtype))
 		return_0;
--- a/lib/commands/toolcontext.c
+++ b/lib/commands/toolcontext.c
@@ -30,6 +30,7 @@
 #include "lvmcache.h"
 #include "lvmetad.h"
 #include "archiver.h"
+#include "lvmpolld-client.h"

 #ifdef HAVE_LIBDL
 #include "sharedlib.h"
@@ -55,6 +56,128 @@

 static const size_t linebuffer_size = 4096;

+/*
+ * Copy the input string, removing invalid characters.
+ */
+const char *system_id_from_string(struct cmd_context *cmd, const char *str)
+{
+	char *system_id;
+
+	if (!str || !*str) {
+		log_warn("WARNING: Empty system ID supplied.");
+		return "";
+	}
+
+	if (!(system_id = dm_pool_zalloc(cmd->libmem, strlen(str) + 1))) {
+		log_warn("WARNING: Failed to allocate system ID.");
+		return NULL;
+	}
+
+	copy_systemid_chars(str, system_id);
+
+	if (!*system_id) {
+		log_warn("WARNING: Invalid system ID format: %s", str);
+		return NULL;
+	}
+
+	if (!strncmp(system_id, "localhost", 9)) {
+		log_warn("WARNING: system ID may not begin with the string \"localhost\".");
+		return NULL;
+	}
+
+	return system_id;
+}
+
+static const char *_read_system_id_from_file(struct cmd_context *cmd, const char *file)
+{
+	char *line = NULL;
+	size_t line_size;
+	char *start, *end;
+	const char *system_id = NULL;
+	FILE *fp;
+
+	if (!file || !strlen(file) || !file[0])
+		return_NULL;
+
+	if (!(fp = fopen(file, "r"))) {
+		log_warn("WARNING: %s: fopen failed: %s", file, strerror(errno));
+		return NULL;
+	}
+
+	while (getline(&line, &line_size, fp) > 0) {
+		start = line;
+
+		/* Ignore leading whitespace */
+		while (*start && isspace(*start))
+			start++;
+
+		/* Ignore rest of line after # */
+		if (!*start || *start == '#')
+			continue;
+
+		if (system_id && *system_id) {
+			log_warn("WARNING: Ignoring extra line(s) in system ID file %s.", file);
+			break;
+		}
+
+		/* Remove any comments from end of line */
+		for (end = start; *end; end++)
+			if (*end == '#') {
+				*end = '\0';
+				break;
+			}
+
+		system_id = system_id_from_string(cmd, start);
+	}
+
+	free(line);
+
+	if (fclose(fp))
+		stack;
+
+	return system_id;
+}
+
+static const char *_system_id_from_source(struct cmd_context *cmd, const char *source)
+{
+	char filebuf[PATH_MAX];
+	const char *file;
+	const char *etc_str;
+	const char *str;
+	const char *system_id = NULL;
+
+	if (!strcasecmp(source, "uname")) {
+		if (cmd->hostname)
+			system_id = system_id_from_string(cmd, cmd->hostname);
+		goto out;
+	}
+
+	/* lvm.conf and lvmlocal.conf are merged into one config tree */
+	if (!strcasecmp(source, "lvmlocal")) {
+		if ((str = find_config_tree_str(cmd, local_system_id_CFG, NULL)))
+			system_id = system_id_from_string(cmd, str);
+		goto out;
+	}
+
+	if (!strcasecmp(source, "machineid") || !strcasecmp(source, "machine-id")) {
+		etc_str = find_config_tree_str(cmd, global_etc_CFG, NULL);
+		if (dm_snprintf(filebuf, sizeof(filebuf), "%s/machine-id", etc_str) != -1)
+			system_id = _read_system_id_from_file(cmd, filebuf);
+		goto out;
+	}
+
+	if (!strcasecmp(source, "file")) {
+		file = find_config_tree_str(cmd, global_system_id_file_CFG, NULL);
+		system_id = _read_system_id_from_file(cmd, file);
+		goto out;
+	}
+
+	log_warn("WARNING: Unrecognised system_id_source \"%s\".", source);
+
+out:
+	return system_id;
+}
+
 static int _get_env_vars(struct cmd_context *cmd)
 {
 	const char *e;
@@ -122,8 +245,10 @@ static int _parse_debug_classes(struct cmd_context *cmd)
 	const struct dm_config_value *cv;
 	int debug_classes = 0;

-	if (!(cn = find_config_tree_node(cmd, log_debug_classes_CFG, NULL)))
-		return DEFAULT_LOGGED_DEBUG_CLASSES;
+	if (!(cn = find_config_tree_array(cmd, log_debug_classes_CFG, NULL))) {
+		log_error(INTERNAL_ERROR "Unable to find configuration for log/debug_classes.");
+		return -1;
+	}

 	for (cv = cn->v; cv; cv = cv->next) {
 		if (cv->type != DM_CFG_STRING) {
@@ -151,6 +276,8 @@ static int _parse_debug_classes(struct cmd_context *cmd)
 			debug_classes |= LOG_CLASS_CACHE;
 		else if (!strcasecmp(cv->v.str, "locking"))
 			debug_classes |= LOG_CLASS_LOCKING;
+		else if (!strcasecmp(cv->v.str, "lvmpolld"))
+			debug_classes |= LOG_CLASS_LVMPOLLD;
 		else
 			log_verbose("Unrecognised value for log/debug_classes: %s", cv->v.str);
 	}
@@ -288,7 +415,59 @@ static int _check_config(struct cmd_context *cmd)
 	return 1;
 }

-int process_profilable_config(struct cmd_context *cmd) {
+static const char *_set_time_format(struct cmd_context *cmd)
+{
+	/* Compared to strftime, we do not allow "newline" character - the %n in format. */
+	static const char *allowed_format_chars = "aAbBcCdDeFGghHIjklmMpPrRsStTuUVwWxXyYzZ%";
+	static const char *allowed_alternative_format_chars_e = "cCxXyY";
+	static const char *allowed_alternative_format_chars_o = "deHImMSuUVwWy";
+	static const char *chars_to_check;
+	const char *tf = find_config_tree_str(cmd, report_time_format_CFG, NULL);
+	const char *p_fmt;
+	size_t i;
+	char c;
+
+	if (!*tf) {
+		log_error("Configured time format is empty string.");
+		goto bad;
+	} else {
+		p_fmt = tf;
+		while ((c = *p_fmt)) {
+			if (c == '%') {
+				c = *++p_fmt;
+				if (c == 'E') {
+					c = *++p_fmt;
+					chars_to_check = allowed_alternative_format_chars_e;
+				} else if (c == 'O') {
+					c = *++p_fmt;
+					chars_to_check = allowed_alternative_format_chars_o;
+				} else
+					chars_to_check = allowed_format_chars;
+
+				for (i = 0; chars_to_check[i]; i++) {
+					if (c == chars_to_check[i])
+						break;
+				}
+				if (!chars_to_check[i])
+					goto_bad;
+			}
+			else if (isprint(c))
+				p_fmt++;
+			else {
+				log_error("Configured time format contains non-printable characters.");
+				goto bad;
+			}
+		}
+	}
+
+	return tf;
+bad:
+	log_error("Invalid time format \"%s\" supplied.", tf);
+	return NULL;
+}
+
+int process_profilable_config(struct cmd_context *cmd)
+{
 	if (!(cmd->default_settings.unit_factor =
 	      dm_units_to_factor(find_config_tree_str(cmd, global_units_CFG, NULL),
 				 &cmd->default_settings.unit_type, 1, NULL))) {
@@ -300,6 +479,46 @@ int process_profilable_config(struct cmd_context *cmd) {
 	cmd->report_binary_values_as_numeric = find_config_tree_bool(cmd, report_binary_values_as_numeric_CFG, NULL);
 	cmd->default_settings.suffix = find_config_tree_bool(cmd, global_suffix_CFG, NULL);
 	cmd->report_list_item_separator = find_config_tree_str(cmd, report_list_item_separator_CFG, NULL);
+	if (!(cmd->time_format = _set_time_format(cmd)))
+		return 0;
+
+	return 1;
+}
+
+static int _init_system_id(struct cmd_context *cmd)
+{
+	const char *source, *system_id;
+	int local_set = 0;
+
+	cmd->system_id = NULL;
+	cmd->unknown_system_id = 0;
+
+	system_id = find_config_tree_str_allow_empty(cmd, local_system_id_CFG, NULL);
+	if (system_id && *system_id)
+		local_set = 1;
+
+	source = find_config_tree_str(cmd, global_system_id_source_CFG, NULL);
+	if (!source)
+		source = "none";
+
+	/* Defining local system_id but not using it is probably a config mistake. */
+	if (local_set && strcmp(source, "lvmlocal"))
+		log_warn("WARNING: local/system_id is set, so should global/system_id_source be \"lvmlocal\" not \"%s\"?", source);
+
+	if (!strcmp(source, "none"))
+		return 1;
+
+	if ((system_id = _system_id_from_source(cmd, source)) && *system_id) {
+		cmd->system_id = system_id;
+		return 1;
+	}
+
+	/*
+	 * The source failed to resolve a system_id.  In this case allow
+	 * VGs with no system_id to be accessed, but not VGs with a system_id.
+	 */
+	log_warn("WARNING: No system ID found from system_id_source %s.", source);
+	cmd->unknown_system_id = 1;

 	return 1;
 }
@@ -307,12 +526,12 @@ int process_profilable_config(struct cmd_context *cmd) {
 static int _process_config(struct cmd_context *cmd)
 {
 	mode_t old_umask;
+	const char *dev_ext_info_src;
 	const char *read_ahead;
 	struct stat st;
 	const struct dm_config_node *cn;
 	const struct dm_config_value *cv;
 	int64_t pv_min_kb;
-	const char *lvmetad_socket;
 	int udev_disabled = 0;
 	char sysfs_dir[PATH_MAX];

@@ -340,6 +559,16 @@ static int _process_config(struct cmd_context *cmd)
 		return_0;
 #endif

+	dev_ext_info_src = find_config_tree_str(cmd, devices_external_device_info_source_CFG, NULL);
+	if (!strcmp(dev_ext_info_src, "none"))
+		init_external_device_info_source(DEV_EXT_NONE);
+	else if (!strcmp(dev_ext_info_src, "udev"))
+		init_external_device_info_source(DEV_EXT_UDEV);
+	else {
+		log_error("Invalid external device info source specification.");
+		return 0;
+	}
+
 	/* proc dir */
 	if (dm_snprintf(cmd->proc_dir, sizeof(cmd->proc_dir), "%s",
 			 find_config_tree_str(cmd, global_proc_CFG, NULL)) < 0) {
@@ -423,7 +652,7 @@ static int _process_config(struct cmd_context *cmd)
 		}
 	}

-	if ((cn = find_config_tree_node(cmd, activation_mlock_filter_CFG, NULL)))
+	if ((cn = find_config_tree_array(cmd, activation_mlock_filter_CFG, NULL)))
 		for (cv = cn->v; cv; cv = cv->next) 
 			if ((cv->type != DM_CFG_STRING) || !cv->v.str[0]) 
 				log_error("Ignoring invalid activation/mlock_filter entry in config file");
@@ -445,29 +674,8 @@ static int _process_config(struct cmd_context *cmd)
 	init_detect_internal_vg_cache_corruption
 		(find_config_tree_bool(cmd, global_detect_internal_vg_cache_corruption_CFG, NULL));

-	lvmetad_disconnect();
-
-	lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
-	if (!lvmetad_socket)
-		lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
-
-	/* TODO?
-		lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
-						      DEFAULT_RUN_DIR "/lvmetad.socket");
-	*/
-	lvmetad_set_socket(lvmetad_socket);
-	cn = find_config_tree_node(cmd, devices_global_filter_CFG, NULL);
-	lvmetad_set_token(cn ? cn->v : NULL);
-
-	if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
-	    find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
-		log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
-			 "Clustered environment not supported by lvmetad yet.");
-		lvmetad_set_active(0);
-	} else
-		lvmetad_set_active(find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
-
-	lvmetad_init(cmd);
+	if (!_init_system_id(cmd))
+		return_0;

 	return 1;
 }
@@ -526,11 +734,12 @@ static int _init_tags(struct cmd_context *cmd, struct dm_config_tree *cft)
 	const char *tag;
 	int passes;

-	if (!(tn = find_config_tree_node(cmd, tags_CFG_SECTION, NULL)) || !tn->child)
+	/* Access tags section directly */
+	if (!(tn = find_config_node(cmd, cft, tags_CFG_SECTION)) || !tn->child)
 		return 1;

 	/* NB hosttags 0 when already 1 intentionally does not delete the tag */
-	if (!cmd->hosttags && find_config_tree_bool(cmd, tags_hosttags_CFG, NULL)) {
+	if (!cmd->hosttags && find_config_bool(cmd, cft, tags_hosttags_CFG)) {
 		/* FIXME Strip out invalid chars: only A-Za-z0-9_+.- */
 		if (!_set_tag(cmd, cmd->hostname))
 			return_0;
@@ -561,7 +770,7 @@ static int _init_tags(struct cmd_context *cmd, struct dm_config_tree *cft)
 	return 1;
 }

-static int _load_config_file(struct cmd_context *cmd, const char *tag)
+static int _load_config_file(struct cmd_context *cmd, const char *tag, int local)
 {
 	static char config_file[PATH_MAX] = "";
 	const char *filler = "";
@@ -569,6 +778,10 @@ static int _load_config_file(struct cmd_context *cmd, const char *tag)

 	if (*tag)
 		filler = "_";
+	else if (local) {
+		filler = "";
+		tag = "local";
+	}

 	if (dm_snprintf(config_file, sizeof(config_file), "%s/lvm%s%s.conf",
 			 cmd->system_dir, filler, tag) < 0) {
@@ -596,7 +809,9 @@ static int _load_config_file(struct cmd_context *cmd, const char *tag)
 	return 1;
 }

-/* Find and read first config file */
+/*
+ * Find and read lvm.conf.
+ */
 static int _init_lvm_conf(struct cmd_context *cmd)
 {
 	/* No config file if LVM_SYSTEM_DIR is empty */
@@ -608,7 +823,7 @@ static int _init_lvm_conf(struct cmd_context *cmd)
 		return 1;
 	}

-	if (!_load_config_file(cmd, ""))
+	if (!_load_config_file(cmd, "", 0))
 		return_0;

 	return 1;
@@ -621,7 +836,7 @@ static int _init_tag_configs(struct cmd_context *cmd)

 	/* Tag list may grow while inside this loop */
 	dm_list_iterate_items(sl, &cmd->tags) {
-		if (!_load_config_file(cmd, sl->str))
+		if (!_load_config_file(cmd, sl->str, 0))
 			return_0;
 	}

@@ -768,15 +983,9 @@ static int _init_dev_cache(struct cmd_context *cmd)

 	init_obtain_device_list_from_udev(device_list_from_udev);

-	if (!(cn = find_config_tree_node(cmd, devices_scan_CFG, NULL))) {
-		if (!dev_cache_add_dir("/dev")) {
-			log_error("Failed to add /dev to internal "
-				  "device cache");
-			return 0;
-		}
-		log_verbose("device/scan not in config file: "
-			    "Defaulting to /dev");
-		return 1;
+	if (!(cn = find_config_tree_array(cmd, devices_scan_CFG, NULL))) {
+		log_error(INTERNAL_ERROR "Unable to find configuration for devices/scan.");
+		return_0;
 	}

 	for (cv = cn->v; cv; cv = cv->next) {
@@ -814,7 +1023,7 @@ static int _init_dev_cache(struct cmd_context *cmd)
 		}
 	}

-	if (!(cn = find_config_tree_node(cmd, devices_loopfiles_CFG, NULL)))
+	if (!(cn = find_config_tree_array(cmd, devices_loopfiles_CFG, NULL)))
 		return 1;

 	for (cv = cn->v; cv; cv = cv->next) {
@@ -835,9 +1044,9 @@ static int _init_dev_cache(struct cmd_context *cmd)
 	return 1;
 }

-#define MAX_FILTERS 6
+#define MAX_FILTERS 8

-static struct dev_filter *_init_filter_components(struct cmd_context *cmd)
+static struct dev_filter *_init_lvmetad_filter_chain(struct cmd_context *cmd)
 {
 	int nr_filt = 0;
 	const struct dm_config_node *cn;
@@ -861,14 +1070,13 @@ static struct dev_filter *_init_filter_components(struct cmd_context *cmd)
 	}

 	/* regex filter. Optional. */
-	if (!(cn = find_config_tree_node(cmd, devices_filter_CFG, NULL)))
-		log_very_verbose("devices/filter not found in config file: "
-				 "no regex filter installed");
-	else if (!(filters[nr_filt] = regex_filter_create(cn->v))) {
-		log_error("Failed to create regex device filter");
-		goto bad;
-	} else
+	if ((cn = find_config_tree_node(cmd, devices_global_filter_CFG, NULL))) {
+		if (!(filters[nr_filt] = regex_filter_create(cn->v))) {
+			log_error("Failed to create global regex device filter");
+			goto bad;
+		}
 		nr_filt++;
+	}

 	/* device type filter. Required. */
 	if (!(filters[nr_filt] = lvm_type_filter_create(cmd->dev_types))) {
@@ -877,6 +1085,15 @@ static struct dev_filter *_init_filter_components(struct cmd_context *cmd)
 	}
 	nr_filt++;

+	/* usable device filter. Required. */
+	if (!(filters[nr_filt] = usable_filter_create(cmd->dev_types,
+						      lvmetad_used() ? FILTER_MODE_PRE_LVMETAD
+								     : FILTER_MODE_NO_LVMETAD))) {
+		log_error("Failed to create usabled device filter");
+		goto bad;
+	}
+	nr_filt++;
+
 	/* mpath component filter. Optional, non-critical. */
 	if (find_config_tree_bool(cmd, devices_multipath_component_detection_CFG, NULL)) {
 		if ((filters[nr_filt] = mpath_filter_create(cmd->dev_types)))
@@ -897,7 +1114,14 @@ static struct dev_filter *_init_filter_components(struct cmd_context *cmd)
 			nr_filt++;
 	}

-	if (!(composite = composite_filter_create(nr_filt, filters)))
+	/* firmware raid filter. Optional, non-critical. */
+	if (find_config_tree_bool(cmd, devices_fw_raid_component_detection_CFG, NULL)) {
+		init_fwraid_filtering(1);
+		if ((filters[nr_filt] = fwraid_filter_create(cmd->dev_types)))
+			nr_filt++;
+	}
+
+	if (!(composite = composite_filter_create(nr_filt, 1, filters)))
 		goto_bad;

 	return composite;
@@ -909,29 +1133,104 @@ bad:
 	return NULL;
 }

-static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
+/*
+ * The way the filtering is initialized depends on whether lvmetad is uesd or not.
+ *
+ * If lvmetad is used, there are three filter chains:
+ *
+ *   - cmd->lvmetad_filter - the lvmetad filter chain used when scanning devs for lvmetad update:
+ *     sysfs filter -> global regex filter -> type filter ->
+ *     usable device filter(FILTER_MODE_PRE_LVMETAD) ->
+ *     mpath component filter -> partitioned filter ->
+ *     md component filter -> fw raid filter
+ *
+ *   - cmd->filter - the filter chain used for lvmetad responses:
+ *     persistent filter -> usable device filter(FILTER_MODE_POST_LVMETAD) ->
+ *     regex filter
+ *
+ *   - cmd->full_filter - the filter chain used for all the remaining situations:
+ *     lvmetad_filter -> filter
+ *
+ * If lvmetad isnot used, there's just one filter chain:
+ *
+ *   - cmd->filter == cmd->full_filter:
+ *     persistent filter -> regex filter -> sysfs filter ->
+ *     global regex filter -> type filter ->
+ *     usable device filter(FILTER_MODE_NO_LVMETAD) ->
+ *     mpath component filter -> partitioned filter ->
+ *     md component filter -> fw raid filter
+ *
+ */
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
 {
 	const char *dev_cache;
-	struct dev_filter *f3 = NULL, *f4 = NULL, *toplevel_components[2] = { 0 };
+	struct dev_filter *filter = NULL, *filter_components[2] = {0};
 	struct stat st;
 	const struct dm_config_node *cn;
+	struct timespec ts, cts;
+
+	if (!cmd->initialized.connections) {
+		log_error(INTERNAL_ERROR "connections must be initialized before filters");
+		return 0;
+	}

 	cmd->dump_filter = 0;

-	if (!(f3 = _init_filter_components(cmd)))
+	cmd->lvmetad_filter = _init_lvmetad_filter_chain(cmd);
+	if (!cmd->lvmetad_filter)
 		goto_bad;

 	init_ignore_suspended_devices(find_config_tree_bool(cmd, devices_ignore_suspended_devices_CFG, NULL));
 	init_ignore_lvm_mirrors(find_config_tree_bool(cmd, devices_ignore_lvm_mirrors_CFG, NULL));

+	/*
+	 * If lvmetad is used, there's a separation between pre-lvmetad filter chain
+	 * ("cmd->lvmetad_filter") applied only if scanning for lvmetad update and
+	 * post-lvmetad filter chain ("filter") applied on each lvmetad response.
+	 * However, if lvmetad is not used, these two chains are not separated
+	 * and we use exactly one filter chain during device scanning ("filter"
+	 * that includes also "cmd->lvmetad_filter" chain).
+	 */
+	/* filter component 0 */
+	if (lvmetad_used()) {
+		if (!(filter_components[0] = usable_filter_create(cmd->dev_types, FILTER_MODE_POST_LVMETAD))) {
+			log_verbose("Failed to create usable device filter.");
+			goto bad;
+		}
+	} else {
+		filter_components[0] = cmd->lvmetad_filter;
+		cmd->lvmetad_filter = NULL;
+	}
+
+	/* filter component 1 */
+	if ((cn = find_config_tree_array(cmd, devices_filter_CFG, NULL))) {
+		if (!(filter_components[1] = regex_filter_create(cn->v)))
+			goto_bad;
+		/* we have two filter components - create composite filter */
+		if (!(filter = composite_filter_create(2, 0, filter_components)))
+			goto_bad;
+	} else
+		/* we have only one filter component - no need to create composite filter */
+		filter = filter_components[0];
+
 	if (!(dev_cache = find_config_tree_str(cmd, devices_cache_CFG, NULL)))
 		goto_bad;

-	if (!(f4 = persistent_filter_create(cmd->dev_types, f3, dev_cache))) {
+	if (!(filter = persistent_filter_create(cmd->dev_types, filter, dev_cache))) {
 		log_verbose("Failed to create persistent device filter.");
 		goto bad;
 	}

+	cmd->filter = filter;
+
+	if (lvmetad_used()) {
+		filter_components[0] = cmd->lvmetad_filter;
+		filter_components[1] = cmd->filter;
+		if (!(cmd->full_filter = composite_filter_create(2, 0, filter_components)))
+			goto_bad;
+	} else
+		cmd->full_filter = filter;
+
 	/* Should we ever dump persistent filter state? */
 	if (find_config_tree_bool(cmd, devices_write_cache_state_CFG, NULL))
 		cmd->dump_filter = 1;
@@ -946,31 +1245,40 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
 	 */
 	if (!find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL) &&
 	    load_persistent_cache && !cmd->is_long_lived &&
-	    !stat(dev_cache, &st) &&
-	    (st.st_ctime > config_file_timestamp(cmd->cft)) &&
-	    !persistent_filter_load(f4, NULL))
-		log_verbose("Failed to load existing device cache from %s",
-			    dev_cache);
-
-	if (!(cn = find_config_tree_node(cmd, devices_global_filter_CFG, NULL))) {
-		cmd->filter = f4;
-	} else if (!(cmd->lvmetad_filter = regex_filter_create(cn->v)))
-		goto_bad;
-	else {
-		toplevel_components[0] = cmd->lvmetad_filter;
-		toplevel_components[1] = f4;
-		if (!(cmd->filter = composite_filter_create(2, toplevel_components)))
-			goto_bad;
+	    !stat(dev_cache, &st)) {
+		lvm_stat_ctim(&ts, &st);
+		cts = config_file_timestamp(cmd->cft);
+		if (timespeccmp(&ts, &cts, >) &&
+		    !persistent_filter_load(cmd->filter, NULL))
+			log_verbose("Failed to load existing device cache from %s",
+				    dev_cache);
 	}

+	cmd->initialized.filters = 1;
 	return 1;
 bad:
-	if (f4)
-		f4->destroy(f4);
-	else if (f3)
-		f3->destroy(f3);
-	if (toplevel_components[0])
-		toplevel_components[0]->destroy(toplevel_components[0]);
+	if (!filter) {
+		/*
+		 * composite filter not created - destroy
+		 * each component directly
+		 */
+		if (filter_components[0])
+			filter_components[0]->destroy(filter_components[0]);
+		if (filter_components[1])
+			filter_components[1]->destroy(filter_components[1]);
+	} else {
+		/*
+		 * composite filter created - destroy it - this
+		 * will also destroy any of its components
+		 */
+		filter->destroy(filter);
+	}
+
+	/* if lvmetad is used, the cmd->lvmetad_filter is separate */
+	if (cmd->lvmetad_filter)
+		cmd->lvmetad_filter->destroy(cmd->lvmetad_filter);
+
+	cmd->initialized.filters = 0;
 	return 0;
 }

@@ -1014,7 +1322,7 @@ static int _init_formats(struct cmd_context *cmd)
 #ifdef HAVE_LIBDL
 	/* Load any formats in shared libs if not static */
 	if (!is_static() &&
-	    (cn = find_config_tree_node(cmd, global_format_libraries_CFG, NULL))) {
+	    (cn = find_config_tree_array(cmd, global_format_libraries_CFG, NULL))) {

 		const struct dm_config_value *cv;
 		struct format_type *(*init_format_fn) (struct cmd_context *);
@@ -1093,7 +1401,6 @@ int lvm_register_segtype(struct segtype_library *seglib,
 	struct segment_type *segtype2;

 	segtype->library = seglib->lib;
-	segtype->cmd = seglib->cmd;

 	dm_list_iterate_items(segtype2, &seglib->cmd->segtypes) {
 		if (strcmp(segtype2->name, segtype->name))
@@ -1137,7 +1444,7 @@ static int _init_segtypes(struct cmd_context *cmd)
 		init_striped_segtype,
 		init_zero_segtype,
 		init_error_segtype,
-		init_free_segtype,
+		/* disabled until needed init_free_segtype, */
 #ifdef SNAPSHOT_INTERNAL
 		init_snapshot_segtype,
 #endif
@@ -1181,7 +1488,7 @@ static int _init_segtypes(struct cmd_context *cmd)
 #ifdef HAVE_LIBDL
 	/* Load any formats in shared libs unless static */
 	if (!is_static() &&
-	    (cn = find_config_tree_node(cmd, global_segment_libraries_CFG, NULL))) {
+	    (cn = find_config_tree_array(cmd, global_segment_libraries_CFG, NULL))) {

 		const struct dm_config_value *cv;
 		int (*init_multiple_segtypes_fn) (struct cmd_context *,
@@ -1345,11 +1652,80 @@ static int _reopen_stream(FILE *stream, int fd, const char *mode, const char *na
 	return 1;
 }

+static int _init_lvmetad(struct cmd_context *cmd)
+{
+	const struct dm_config_node *cn;
+	const char *lvmetad_socket;
+
+	lvmetad_disconnect();
+
+	lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
+	if (!lvmetad_socket)
+		lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
+
+	/* TODO?
+		lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
+						      DEFAULT_RUN_DIR "/lvmetad.socket");
+	*/
+
+	lvmetad_set_socket(lvmetad_socket);
+	cn = find_config_tree_array(cmd, devices_global_filter_CFG, NULL);
+	lvmetad_set_token(cn ? cn->v : NULL);
+
+	if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
+	    find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
+		log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
+			 "Clustered environment not supported by lvmetad yet.");
+		lvmetad_set_active(NULL, 0);
+	} else
+		lvmetad_set_active(NULL, find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
+
+	lvmetad_init(cmd);
+	return 1;
+}
+
+static int _init_lvmpolld(struct cmd_context *cmd)
+{
+	const char *lvmpolld_socket;
+
+	lvmpolld_disconnect();
+
+	lvmpolld_socket = getenv("LVM_LVMPOLLD_SOCKET");
+	if (!lvmpolld_socket)
+		lvmpolld_socket = DEFAULT_RUN_DIR "/lvmpolld.socket";
+	lvmpolld_set_socket(lvmpolld_socket);
+
+	lvmpolld_set_active(find_config_tree_bool(cmd, global_use_lvmpolld_CFG, NULL));
+	return 1;
+}
+
+int init_connections(struct cmd_context *cmd)
+{
+
+	if (!_init_lvmetad(cmd)) {
+		log_error("Failed to initialize lvmetad connection.");
+		goto bad;
+	}
+
+	if (!_init_lvmpolld(cmd)) {
+		log_error("Failed to initialize lvmpolld connection.");
+		goto bad;
+	}
+
+	cmd->initialized.connections = 1;
+	return 1;
+bad:
+	cmd->initialized.connections = 0;
+	return 0;
+}
+
 /* Entry point */
 struct cmd_context *create_toolcontext(unsigned is_long_lived,
 				       const char *system_dir,
 				       unsigned set_buffering,
-				       unsigned threaded)
+				       unsigned threaded,
+				       unsigned set_connections,
+				       unsigned set_filters)
 {
 	struct cmd_context *cmd;
 	FILE *new_stream;
@@ -1470,6 +1846,10 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
 	if (!_init_tags(cmd, cmd->cft))
 		goto_out;

+	/* Load lvmlocal.conf */
+	if (*cmd->system_dir && !_load_config_file(cmd, "", 1))
+		goto_out;
+
 	if (!_init_tag_configs(cmd))
 		goto_out;

@@ -1483,15 +1863,12 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
 		goto_out;

 	if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
-						find_config_tree_node(cmd, devices_types_CFG, NULL))))
+						find_config_tree_array(cmd, devices_types_CFG, NULL))))
 		goto_out;

 	if (!_init_dev_cache(cmd))
 		goto_out;

-	if (!_init_filters(cmd, 1))
-		goto_out;
-
 	memlock_init(cmd);

 	if (!_init_formats(cmd))
@@ -1510,12 +1887,18 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,

 	_init_globals(cmd);

+	if (set_connections && !init_connections(cmd))
+		return_0;
+
+	if (set_filters && !init_filters(cmd, 1))
+		goto_out;
+
 	cmd->default_settings.cache_vgmetadata = 1;
 	cmd->current_settings = cmd->default_settings;

-	cmd->config_initialized = 1;
+	cmd->initialized.config = 1;
 out:
-	if (!cmd->config_initialized) {
+	if (!cmd->initialized.config) {
 		destroy_toolcontext(cmd);
 		cmd = NULL;
 	}
@@ -1581,18 +1964,25 @@ static void _destroy_dev_types(struct cmd_context *cmd)
 	cmd->dev_types = NULL;
 }

+static void _destroy_filters(struct cmd_context *cmd)
+{
+	if (cmd->full_filter) {
+		cmd->full_filter->destroy(cmd->full_filter);
+		cmd->lvmetad_filter = cmd->filter = cmd->full_filter = NULL;
+	}
+	cmd->initialized.filters = 0;
+}
+
 int refresh_filters(struct cmd_context *cmd)
 {
 	int r, saved_ignore_suspended_devices = ignore_suspended_devices();

-	if (cmd->filter) {
-		cmd->filter->destroy(cmd->filter);
-		cmd->filter = NULL;
-	}
+	if (!cmd->initialized.filters)
+		/* if filters not initialized, there's nothing to refresh */
+		return 1;

-	cmd->lvmetad_filter = NULL;
-
-	if (!(r = _init_filters(cmd, 0)))
+	_destroy_filters(cmd);
+	if (!(r = init_filters(cmd, 0)))
                stack;

 	/*
@@ -1621,10 +2011,7 @@ int refresh_toolcontext(struct cmd_context *cmd)
 	label_exit();
 	_destroy_segtypes(&cmd->segtypes);
 	_destroy_formats(cmd, &cmd->formats);
-	if (cmd->filter) {
-		cmd->filter->destroy(cmd->filter);
-		cmd->filter = NULL;
-	}
+
 	if (!dev_cache_exit())
 		stack;
 	_destroy_dev_types(cmd);
@@ -1641,7 +2028,7 @@ int refresh_toolcontext(struct cmd_context *cmd)

 	_destroy_config(cmd);

-	cmd->config_initialized = 0;
+	cmd->initialized.config = 0;

 	cmd->hosttags = 0;

@@ -1674,6 +2061,10 @@ int refresh_toolcontext(struct cmd_context *cmd)
 	if (!_init_tags(cmd, cft_tmp))
 		return_0;

+	/* Load lvmlocal.conf */
+	if (*cmd->system_dir && !_load_config_file(cmd, "", 1))
+		return_0;
+
 	/* Doesn't change cmd->cft */
 	if (!_init_tag_configs(cmd))
 		return_0;
@@ -1694,15 +2085,12 @@ int refresh_toolcontext(struct cmd_context *cmd)
 		return_0;

 	if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
-						find_config_tree_node(cmd, devices_types_CFG, NULL))))
+						find_config_tree_array(cmd, devices_types_CFG, NULL))))
 		return_0;

 	if (!_init_dev_cache(cmd))
 		return_0;

-	if (!_init_filters(cmd, 0))
-		return_0;
-
 	if (!_init_formats(cmd))
 		return_0;

@@ -1715,7 +2103,13 @@ int refresh_toolcontext(struct cmd_context *cmd)
 	if (!_init_backup(cmd))
 		return_0;

-	cmd->config_initialized = 1;
+	cmd->initialized.config = 1;
+
+	if (cmd->initialized.connections && !init_connections(cmd))
+		return_0;
+
+	if (!refresh_filters(cmd))
+		return_0;

 	reset_lvm_errno(1);
 	return 1;
@@ -1737,8 +2131,7 @@ void destroy_toolcontext(struct cmd_context *cmd)
 	label_exit();
 	_destroy_segtypes(&cmd->segtypes);
 	_destroy_formats(cmd, &cmd->formats);
-	if (cmd->filter)
-		cmd->filter->destroy(cmd->filter);
+	_destroy_filters(cmd);
 	if (cmd->mem)
 		dm_pool_destroy(cmd->mem);
 	dev_cache_exit();
@@ -1785,6 +2178,7 @@ void destroy_toolcontext(struct cmd_context *cmd)

 	lvmetad_release_token();
 	lvmetad_disconnect();
+	lvmpolld_disconnect();

 	release_log_memory();
 	activation_exit();
--- a/lib/commands/toolcontext.h
+++ b/lib/commands/toolcontext.h
@@ -60,28 +60,59 @@ struct config_tree_list {
 	struct dm_config_tree *cft;
 };

+struct cmd_context_initialized_parts {
+	unsigned config:1; /* used to reinitialize config if previous init was not successful */
+	unsigned filters:1;
+	unsigned connections:1;
+};
+
 /* FIXME Split into tool & library contexts */
 /* command-instance-related variables needed by library */
 struct cmd_context {
-	struct dm_pool *libmem;	/* For permanent config data */
-	struct dm_pool *mem;	/* Transient: Cleared between each command */
+	/*
+	 * Memory handlers.
+	 */
+	struct dm_pool *libmem;			/* for permanent config data */
+	struct dm_pool *mem;			/* transient: cleared between each command */

-	const struct format_type *fmt;	/* Current format to use by default */
-	struct format_type *fmt_backup;	/* Format to use for backups */
-
-	struct dm_list formats;	/* Available formats */
-	struct dm_list segtypes;	/* Available segment types */
-	const char *hostname;
-	const char *kernel_vsn;
-
-	unsigned rand_seed;
-	char *linebuffer;
+	/*
+	 * Command line and arguments.
+	 */
 	const char *cmd_line;
 	struct command *command;
 	char **argv;
 	struct arg_values *arg_values;
 	struct dm_list arg_value_groups;
-	unsigned is_long_lived:1;	/* Optimises persistent_filter handling */
+
+	/*
+	 * Format handlers.
+	 */
+	const struct format_type *fmt;		/* current format to use by default */
+	struct format_type *fmt_backup;		/* format to use for backups */
+	struct dm_list formats;			/* available formats */
+	struct dm_list segtypes;		/* available segment types */
+
+	/*
+	 * Machine and system identification.
+	 */
+	const char *system_id;
+	const char *hostname;
+	const char *kernel_vsn;
+
+	/*
+	 * Device identification.
+	 */
+	struct dev_types *dev_types;		/* recognized extra device types. */
+
+	/*
+	 * Initialization state.
+	 */
+	struct cmd_context_initialized_parts initialized;
+
+	/*
+	 * Switches.
+	 */
+	unsigned is_long_lived:1;		/* optimises persistent_filter handling */
 	unsigned handles_missing_pvs:1;
 	unsigned handles_unknown_segments:1;
 	unsigned use_linear_target:1;
@@ -92,40 +123,71 @@ struct cmd_context {
 	unsigned report_binary_values_as_numeric:1;
 	unsigned metadata_read_only:1;
 	unsigned ignore_clustered_vgs:1;
-	unsigned threaded:1;		/* Set if running within a thread e.g. clvmd */
+	unsigned threaded:1;			/* set if running within a thread e.g. clvmd */
+	unsigned independent_metadata_areas:1;	/* active formats have MDAs outside PVs */
+	unsigned unknown_system_id:1;
+	unsigned include_foreign_vgs:1;		/* report/display cmds can reveal foreign VGs */
+	unsigned include_shared_vgs:1;		/* report/display cmds can reveal lockd VGs */
+	unsigned include_active_foreign_vgs:1;	/* cmd should process foreign VGs with active LVs */
+	unsigned vg_read_print_access_error:1;	/* print access errors from vg_read */
+	unsigned lockd_gl_disable:1;
+	unsigned lockd_vg_disable:1;
+	unsigned lockd_lv_disable:1;
+	unsigned lockd_vg_default_sh:1;
+	unsigned lockd_vg_enforce_sh:1;

-	unsigned independent_metadata_areas:1;	/* Active formats have MDAs outside PVs */
+	/*
+	 * Filtering.
+	 */
+	struct dev_filter *lvmetad_filter;	/* pre-lvmetad filter chain */
+	struct dev_filter *filter;		/* post-lvmetad filter chain */
+	struct dev_filter *full_filter;		/* lvmetad_filter + filter */
+	int dump_filter;			/* Dump filter when exiting? */

-	struct dev_types *dev_types;
-	struct dev_filter *filter;
-	struct dev_filter *lvmetad_filter;
-	int dump_filter;	/* Dump filter when exiting? */
-
-	struct dm_list config_files; /* master lvm config + any existing tag configs */
-	struct profile_params *profile_params; /* profile handling params including loaded profile configs */
-	struct dm_config_tree *cft; /* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
-	int config_initialized; /* used to reinitialize config if previous init was not successful */
-
-	struct dm_hash_table *cft_def_hash; /* config definition hash used for validity check (item type + item recognized) */
-
-	/* selected settings with original default/configured value which can be changed during cmd processing */
-	struct config_info default_settings;
-	/* may contain changed values compared to default_settings */
-	struct config_info current_settings;
+	/*
+	 * Configuration.
+	 */
+	struct dm_list config_files; 		/* master lvm config + any existing tag configs */
+	struct profile_params *profile_params;	/* profile handling params including loaded profile configs */
+	struct dm_config_tree *cft;		/* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
+	struct dm_hash_table *cft_def_hash;	/* config definition hash used for validity check (item type + item recognized) */
+	struct config_info default_settings;	/* selected settings with original default/configured value which can be changed during cmd processing */
+	struct config_info current_settings; 	/* may contain changed values compared to default_settings */

+	/*
+	 * Archives and backups.
+	 */
 	struct archive_params *archive_params;
 	struct backup_params *backup_params;
 	const char *stripe_filler;

-	/* List of defined tags */
-	struct dm_list tags;
-	const char *report_list_item_separator;
+	/*
+	 * Host tags.
+	 */
+	struct dm_list tags;			/* list of defined tags */
 	int hosttags;

-	const char *lib_dir;		/* Cache value global/library_dir */
+	/*
+	 * Paths.
+	 */
+	const char *lib_dir;			/* cache value global/library_dir */
 	char system_dir[PATH_MAX];
 	char dev_dir[PATH_MAX];
 	char proc_dir[PATH_MAX];
+
+	/*
+	 * Buffers.
+	 */
+	char display_buffer[NAME_LEN * 10];	/* ring buffer for upto 10 longest vg/lv names */
+	unsigned display_lvname_idx;		/* index to ring buffer */
+	char *linebuffer;
+
+	/*
+	 * Others - unsorted.
+	 */
+	const char *report_list_item_separator;
+	const char *time_format;
+	unsigned rand_seed;
 };

 /*
@@ -135,14 +197,20 @@ struct cmd_context {
 struct cmd_context *create_toolcontext(unsigned is_long_lived,
 				       const char *system_dir,
 				       unsigned set_buffering,
-				       unsigned threaded);
+				       unsigned threaded,
+				       unsigned set_connections,
+				       unsigned set_filters);
 void destroy_toolcontext(struct cmd_context *cmd);
 int refresh_toolcontext(struct cmd_context *cmd);
 int refresh_filters(struct cmd_context *cmd);
 int process_profilable_config(struct cmd_context *cmd);
 int config_files_changed(struct cmd_context *cmd);
 int init_lvmcache_orphans(struct cmd_context *cmd);
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache);
+int init_connections(struct cmd_context *cmd);

 struct format_type *get_format_by_name(struct cmd_context *cmd, const char *format);

+const char *system_id_from_string(struct cmd_context *cmd, const char *str);
+
 #endif
--- a/lib/config/config.c
+++ b/lib/config/config.c
@@ -53,7 +53,7 @@ struct config_file {

 struct config_source {
 	config_source_t type;
-	time_t timestamp;
+	struct timespec timestamp;
 	union {
 		struct config_file *file;
 		struct config_file *profile;
@@ -65,11 +65,11 @@ struct config_source {
 * Map each ID to respective definition of the configuration item.
 */
 static struct cfg_def_item _cfg_def_items[CFG_COUNT + 1] = {
-#define cfg_section(id, name, parent, flags, since_version, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, comment},
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, comment},
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, comment},
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, {0}, deprecated_since_version, deprecation_comment, comment},
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
 #include "config_settings.h"
 #undef cfg_section
 #undef cfg
@@ -173,7 +173,7 @@ int config_file_check(struct dm_config_tree *cft, const char **filename, struct
 		return 0;
 	}

-	cs->timestamp = info->st_ctime;
+	lvm_stat_ctim(&cs->timestamp, info);
 	cf->exists = 1;
 	cf->st_size = info->st_size;

@@ -193,6 +193,7 @@ int config_file_changed(struct dm_config_tree *cft)
 	struct config_source *cs = dm_config_get_custom(cft);
 	struct config_file *cf;
 	struct stat info;
+	struct timespec ts;

 	if (cs->type != CONFIG_FILE) {
 		log_error(INTERNAL_ERROR "config_file_changed: expected file config source, "
@@ -226,7 +227,9 @@ int config_file_changed(struct dm_config_tree *cft)
 	}

 	/* Unchanged? */
-	if (cs->timestamp == info.st_ctime && cf->st_size == info.st_size)
+	lvm_stat_ctim(&ts, &info);
+	if ((timespeccmp(&cs->timestamp, &ts, ==)) &&
+	    cf->st_size == info.st_size)
 		return 0;

      reload:
@@ -478,9 +481,15 @@ int override_config_tree_from_profile(struct cmd_context *cmd,
 	return 0;
 }

+/*
+ * When checksum_only is set, the checksum of buffer is only matched
+ * and function avoids parsing of mda into config tree which
+ * remains unmodified and should not be used.
+ */
 int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 			off_t offset, size_t size, off_t offset2, size_t size2,
-			checksum_fn_t checksum_fn, uint32_t checksum)
+			checksum_fn_t checksum_fn, uint32_t checksum,
+			int checksum_only)
 {
 	char *fb, *fe;
 	int r = 0;
@@ -529,9 +538,11 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 		goto out;
 	}

-	fe = fb + size + size2;
-	if (!dm_config_parse(cft, fb, fe))
-		goto_out;
+	if (!checksum_only) {
+		fe = fb + size + size2;
+		if (!dm_config_parse(cft, fb, fe))
+			goto_out;
+	}

 	r = 1;

@@ -575,7 +586,7 @@ int config_file_read(struct dm_config_tree *cft)
 	}

 	r = config_file_read_fd(cft, cf->dev, 0, (size_t) info.st_size, 0, 0,
-				(checksum_fn_t) NULL, 0);
+				(checksum_fn_t) NULL, 0, 0);

 	if (!cf->keep_open) {
 		if (!dev_close(cf->dev))
@@ -586,13 +597,14 @@ int config_file_read(struct dm_config_tree *cft)
 	return r;
 }

-time_t config_file_timestamp(struct dm_config_tree *cft)
+struct timespec config_file_timestamp(struct dm_config_tree *cft)
 {
 	struct config_source *cs = dm_config_get_custom(cft);
 	return cs->timestamp;
 }

 #define cfg_def_get_item_p(id) (&_cfg_def_items[id])
+#define cfg_def_get_default_unconfigured_value_hint(cmd,item) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_unconfigured_value.fn_UNCONFIGURED(cmd) : item->default_unconfigured_value.v_UNCONFIGURED)
 #define cfg_def_get_default_value_hint(cmd,item,type,profile) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_value.fn_##type(cmd,profile) : item->default_value.v_##type)
 #define cfg_def_get_default_value(cmd,item,type,profile) (item->flags & CFG_DEFAULT_UNDEFINED ? 0 : cfg_def_get_default_value_hint(cmd,item,type,profile))

@@ -649,27 +661,33 @@ static void _log_type_error(const char *path, cfg_def_type_t actual,
 	_get_type_name(actual_type_name, sizeof(actual_type_name), actual);
 	_get_type_name(expected_type_name, sizeof(expected_type_name), expected);

-	log_warn_suppress(suppress_messages, "Configuration setting \"%s\" has invalid type. "
-					     "Found%s, expected%s.", path,
+	log_warn_suppress(suppress_messages, "WARNING: Configuration setting \"%s\" has invalid type. "
+					     "Found%s but expected%s.", path,
 					     actual_type_name, expected_type_name);
 }

-static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
-						     const cfg_def_item_t *def)
+static struct dm_config_value *_get_def_array_values(struct cmd_context *cmd,
+						     struct dm_config_tree *cft,
+						     const cfg_def_item_t *def,
+						     uint32_t format_flags)
 {
+	const char *def_enc_value;
 	char *enc_value, *token, *p, *r;
 	struct dm_config_value *array = NULL, *v = NULL, *oldv = NULL;

-	if (!def->default_value.v_CFG_TYPE_STRING) {
+	def_enc_value = cfg_def_get_default_value(cmd, def, CFG_TYPE_ARRAY, NULL);
+
+	if (!def_enc_value) {
 		if (!(array = dm_config_create_value(cft))) {
 			log_error("Failed to create default empty array for %s.", def->name);
 			return NULL;
 		}
 		array->type = DM_CFG_EMPTY_ARRAY;
+		dm_config_value_set_format_flags(array, format_flags);
 		return array;
 	}

-	if (!(p = token = enc_value = dm_strdup(def->default_value.v_CFG_TYPE_STRING))) {
+	if (!(p = token = enc_value = dm_strdup(def_enc_value))) {
 		log_error("_get_def_array_values: dm_strdup failed");
 		return NULL;
 	}
@@ -698,6 +716,9 @@ static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
 			dm_free(enc_value);
 			return NULL;
 		}
+
+		dm_config_value_set_format_flags(v, format_flags);
+
 		if (oldv)
 			oldv->next = v;
 		if (!array)
@@ -790,6 +811,11 @@ static int _config_def_check_node_single_value(struct cft_check_handle *handle,
 			} else if  (!(def->type & CFG_TYPE_STRING)) {
 				_log_type_error(rp, CFG_TYPE_STRING, def->type, handle->suppress_messages);
 				return 0;
+			} else if (!(def->flags & CFG_ALLOW_EMPTY) && !*v->v.str) {
+				log_warn_suppress(handle->suppress_messages,
+						  "Configuration setting \"%s\" invalid. "
+						  "It cannot be set to an empty value.", rp);
+				return 0;
 			}
 			break;
 		default: ;
@@ -809,6 +835,12 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
 	float f;
 	const char *str;

+	if ((handle->ignoreunsupported && (def->flags & CFG_UNSUPPORTED)) ||
+	    (handle->ignoreadvanced && (def->flags & CFG_ADVANCED))) {
+		diff = 0;
+		goto out;
+	}
+
 	/* if default value is undefined, the value used differs from default */
 	if (def->flags & CFG_DEFAULT_UNDEFINED) {
 		diff = 1;
@@ -816,7 +848,7 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
 	}

 	if (!v_def && (def->type & CFG_TYPE_ARRAY)) {
-		if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cft, def)))
+		if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cmd, handle->cft, def, 0)))
 			return_0;
 		do {
 			/* iterate over each element of the array and check its value */
@@ -1008,9 +1040,14 @@ static int _config_def_check_tree(struct cft_check_handle *handle,
 				  size_t buf_size, struct dm_config_node *root)
 {
 	struct dm_config_node *cn;
+	cfg_def_item_t *def;
 	int valid, r = 1;
 	size_t len;

+	def = cfg_def_get_item_p(root->id);
+	if (def->flags & CFG_SECTION_NO_CHECK)
+		return 1;
+
 	for (cn = root->child; cn; cn = cn->sib) {
 		if ((valid = _config_def_check_node(handle, vp, pvp, rp, prp,
 						    buf_size, cn)) && !cn->v) {
@@ -1140,6 +1177,29 @@ static int _apply_local_profile(struct cmd_context *cmd, struct profile *profile
 	return override_config_tree_from_profile(cmd, profile);
 }

+static int _config_disabled(struct cmd_context *cmd, cfg_def_item_t *item, const char *path)
+{
+	if ((item->flags & CFG_DISABLED) && dm_config_tree_find_node(cmd->cft, path)) {
+		log_warn("WARNING: Configuration setting %s is disabled. Using default value.", path);
+		return 1;
+	}
+
+	return 0;
+}
+
+const struct dm_config_node *find_config_node(struct cmd_context *cmd, struct dm_config_tree *cft, int id)
+{
+	cfg_def_item_t *item = cfg_def_get_item_p(id);
+	char path[CFG_PATH_MAX_LEN];
+	const struct dm_config_node *cn;
+
+	_cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+	cn = dm_config_tree_find_node(cft, path);
+
+	return cn;
+}
+
 const struct dm_config_node *find_config_tree_node(struct cmd_context *cmd, int id, struct profile *profile)
 {
 	cfg_def_item_t *item = cfg_def_get_item_p(id);
@@ -1171,7 +1231,8 @@ const char *find_config_tree_str(struct cmd_context *cmd, int id, struct profile
 	if (item->type != CFG_TYPE_STRING)
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared as string.", path);

-	str = dm_config_tree_find_str(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
+	str = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile)
+						: dm_config_tree_find_str(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1194,7 +1255,8 @@ const char *find_config_tree_str_allow_empty(struct cmd_context *cmd, int id, st
 	if (!(item->flags & CFG_ALLOW_EMPTY))
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared to allow empty values.", path);

-	str = dm_config_tree_find_str_allow_empty(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));
+	str = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile)
+						: dm_config_tree_find_str_allow_empty(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_STRING, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1215,7 +1277,8 @@ int find_config_tree_int(struct cmd_context *cmd, int id, struct profile *profil
 	if (item->type != CFG_TYPE_INT)
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared as integer.", path);

-	i = dm_config_tree_find_int(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
+	i = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile)
+					      : dm_config_tree_find_int(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1236,7 +1299,8 @@ int64_t find_config_tree_int64(struct cmd_context *cmd, int id, struct profile *
 	if (item->type != CFG_TYPE_INT)
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared as integer.", path);

-	i64 = dm_config_tree_find_int64(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));
+	i64 = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile)
+						: dm_config_tree_find_int64(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_INT, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1257,7 +1321,8 @@ float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *pr
 	if (item->type != CFG_TYPE_FLOAT)
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared as float.", path);

-	f = dm_config_tree_find_float(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile));
+	f = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile)
+					      : dm_config_tree_find_float(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_FLOAT, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1265,6 +1330,23 @@ float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *pr
 	return f;
 }

+int find_config_bool(struct cmd_context *cmd, struct dm_config_tree *cft, int id)
+{
+	cfg_def_item_t *item = cfg_def_get_item_p(id);
+	char path[CFG_PATH_MAX_LEN];
+	int b;
+
+	_cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+	if (item->type != CFG_TYPE_BOOL)
+		log_error(INTERNAL_ERROR "%s cfg tree element not declared as boolean.", path);
+
+	b = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, NULL)
+					      : dm_config_tree_find_bool(cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, NULL));
+
+	return b;
+}
+
 int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profile)
 {
 	cfg_def_item_t *item = cfg_def_get_item_p(id);
@@ -1278,7 +1360,8 @@ int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profi
 	if (item->type != CFG_TYPE_BOOL)
 		log_error(INTERNAL_ERROR "%s cfg tree element not declared as boolean.", path);

-	b = dm_config_tree_find_bool(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile));
+	b = _config_disabled(cmd, item, path) ? cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile)
+					      : dm_config_tree_find_bool(cmd->cft, path, cfg_def_get_default_value(cmd, item, CFG_TYPE_BOOL, profile));

 	if (profile_applied)
 		remove_config_tree_by_source(cmd, profile->source);
@@ -1286,6 +1369,106 @@ int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profi
 	return b;
 }

+static struct dm_config_node *_get_array_def_node(struct cmd_context *cmd,
+						  cfg_def_item_t *def,
+						  struct profile *profile)
+{
+	struct dm_config_node *cn;
+
+	if (def->flags & CFG_DEFAULT_UNDEFINED)
+		return NULL;
+
+	if (!(cn = dm_config_create_node(cmd->cft, def->name))) {
+		log_error("Failed to create default array node for %s.", def->name);
+		return NULL;
+	}
+
+	if (!(cn->v = _get_def_array_values(cmd, cmd->cft, def, 0))) {
+		dm_pool_free(cmd->cft->mem, cn);
+		return_NULL;
+	}
+
+	return cn;
+}
+
+struct _config_array_out_handle {
+	struct dm_pool *mem;
+	char *str;
+};
+
+static int _config_array_line(const struct dm_config_node *cn, const char *line, void *baton)
+{
+	struct _config_array_out_handle *handle = (struct _config_array_out_handle *) baton;
+
+	if (!(handle->str = dm_pool_strdup(handle->mem, line))) {
+		log_error("_config_array_line: dm_pool_strdup failed");
+		return 0;
+	}
+
+	return 1;
+}
+
+static void _log_array_value_used(struct dm_pool *mem, const struct dm_config_node *cn,
+				  const char *path, int default_used)
+{
+	struct _config_array_out_handle out_handle = { 0 };
+	struct dm_config_node_out_spec out_spec = { 0 };
+	uint32_t old_format_flags;
+
+	out_handle.mem = mem;
+	out_spec.line_fn = _config_array_line;
+
+	old_format_flags = dm_config_value_get_format_flags(cn->v);
+	dm_config_value_set_format_flags(cn->v,
+		DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES |
+		DM_CONFIG_VALUE_FMT_COMMON_ARRAY);
+
+	if (!dm_config_write_one_node_out(cn, &out_spec, &out_handle)) {
+		log_error("_log_array_value_used: failed to write node value");
+		out_handle.mem = NULL;
+	}
+
+	if (default_used)
+		log_very_verbose("%s not found in config: defaulting to %s",
+				 path, out_handle.mem ? out_handle.str : "<unknown>");
+	else
+		log_very_verbose("Setting %s to %s",
+				 path, out_handle.mem ? out_handle.str : "<unknown>");
+
+	if (out_handle.mem)
+		dm_pool_free(out_handle.mem, out_handle.str);
+	dm_config_value_set_format_flags(cn->v, old_format_flags);
+}
+
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile)
+{
+	cfg_def_item_t *item = cfg_def_get_item_p(id);
+	char path[CFG_PATH_MAX_LEN];
+	int profile_applied;
+	const struct dm_config_node *cn = NULL, *cn_def = NULL;
+	profile_applied = _apply_local_profile(cmd, profile);
+	_cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+	if (!(item->type & CFG_TYPE_ARRAY))
+		log_error(INTERNAL_ERROR "%s cfg tree element not declared as array.", path);
+
+	if (_config_disabled(cmd, item, path) ||
+	    !(cn = find_config_tree_node(cmd, id, profile)))
+		cn_def = _get_array_def_node(cmd, item, profile);
+
+	if (cn)
+		_log_array_value_used(cmd->cft->mem, cn, path, 0);
+	else if (cn_def) {
+		_log_array_value_used(cmd->cft->mem, cn_def, path, 1);
+		cn = cn_def;
+	}
+
+	if (profile_applied)
+		remove_config_tree_by_source(cmd, profile->source);
+
+	return cn;
+}
+
 /* Insert cn2 after cn1 */
 static void _insert_config_node(struct dm_config_node **cn1,
 				struct dm_config_node *cn2)
@@ -1414,7 +1597,7 @@ int merge_config_tree(struct cmd_context *cmd, struct dm_config_tree *cft,
 	cs = dm_config_get_custom(cft);
 	csn = dm_config_get_custom(newdata);

-	if (cs && csn && (cs->timestamp < csn->timestamp))
+	if (cs && csn && timespeccmp(&cs->timestamp, &csn->timestamp, <))
 		cs->timestamp = csn->timestamp;

 	return 1;
@@ -1426,6 +1609,55 @@ struct out_baton {
 	struct dm_pool *mem;
 };

+#define MAX_COMMENT_LINE 512
+
+static int _copy_one_line(const char *comment, char *line, int *pos, int len)
+{
+	int p;
+	int i = 0;
+	char c;
+
+	if (*pos >= len)
+		return 0;
+
+	memset(line, 0, MAX_COMMENT_LINE+1);
+
+	for (p = *pos; ; p++) {
+		c = comment[p];
+
+		(*pos)++;
+
+		if (c == '\n' || c == '\0')
+			break;
+
+		line[i++] = c;
+
+		if (i == MAX_COMMENT_LINE)
+		       break;
+	}
+
+	return i;
+}
+
+static int _get_config_node_version(uint16_t version_enc, char *version)
+{
+	if (dm_snprintf(version, 9, "%u.%u.%u",
+			(version_enc & 0xE000) >> 13,
+			(version_enc & 0x1E00) >> 9,
+			(version_enc & 0x1FF)) == -1) {
+		log_error("_get_config_node_version: couldn't create version string");
+		return 0;
+	}
+
+	return 1;
+}
+
+static int _def_node_is_deprecated(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+	return def->deprecated_since_version &&
+	       (spec->version >= def->deprecated_since_version);
+}
+
 static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, void *baton)
 {
 	struct out_baton *out = baton;
@@ -1433,15 +1665,13 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi
 	char version[9]; /* 8+1 chars for max version of 7.15.511 */
 	const char *node_type_name = cn->v ? "option" : "section";
 	char path[CFG_PATH_MAX_LEN];
+	char commentline[MAX_COMMENT_LINE+1];

-
-	if (cn->id < 0)
+	if (cn->id <= 0)
 		return 1;

-	if (!cn->id) {
-		log_error(INTERNAL_ERROR "Configuration node %s has invalid id.", cn->key);
-		return 0;
-	}
+	if (out->tree_spec->type == CFG_DEF_TREE_LIST)
+		return 1;

 	if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
 	    (!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
@@ -1449,12 +1679,27 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi

 	cfg_def = cfg_def_get_item_p(cn->id);

-	if (out->tree_spec->withcomments) {
+	if (out->tree_spec->withsummary || out->tree_spec->withcomments) {
 		_cfg_def_make_path(path, sizeof(path), cfg_def->id, cfg_def, 1);
+		fprintf(out->fp, "\n");
 		fprintf(out->fp, "%s# Configuration %s %s.\n", line, node_type_name, path);

-		if (cfg_def->comment)
-			fprintf(out->fp, "%s# %s\n", line, cfg_def->comment);
+		if (out->tree_spec->withcomments &&
+		    _def_node_is_deprecated(cfg_def, out->tree_spec))
+			fprintf(out->fp, "%s# %s", line, cfg_def->deprecation_comment);
+
+		if (cfg_def->comment) {
+			int pos = 0;
+			while (_copy_one_line(cfg_def->comment, commentline, &pos, strlen(cfg_def->comment))) {
+				fprintf(out->fp, "%s# %s\n", line, commentline);
+				/* withsummary prints only the first comment line. */
+				if (!out->tree_spec->withcomments)
+					break;
+			}
+		}
+
+		if (_def_node_is_deprecated(cfg_def, out->tree_spec))
+			fprintf(out->fp, "%s# This configuration %s is deprecated.\n", line, node_type_name);

 		if (cfg_def->flags & CFG_ADVANCED)
 			fprintf(out->fp, "%s# This configuration %s is advanced.\n", line, node_type_name);
@@ -1467,34 +1712,98 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi

 		if (cfg_def->flags & CFG_DEFAULT_UNDEFINED)
 			fprintf(out->fp, "%s# This configuration %s does not have a default value defined.\n", line, node_type_name);
+
+		if ((out->tree_spec->type == CFG_DEF_TREE_FULL) &&
+		    (out->tree_spec->check_status[cn->id] & CFG_USED))
+			fprintf(out->fp, "%s# Value defined in existing configuration has been used for this setting.\n", line);
 	}

 	if (out->tree_spec->withversions) {
-		if (dm_snprintf(version, 9, "%u.%u.%u",
-				(cfg_def->since_version & 0xE000) >> 13,
-				(cfg_def->since_version & 0x1E00) >> 9,
-				(cfg_def->since_version & 0x1FF)) == -1) {
-			log_error("_out_prefix_fn: couldn't create version string");
-			return 0;
+		if (!_get_config_node_version(cfg_def->since_version, version))
+			return_0;
+		fprintf(out->fp, "%s# Available since version %s.\n", line, version);
+
+		if (_def_node_is_deprecated(cfg_def, out->tree_spec)) {
+			if (!_get_config_node_version(cfg_def->deprecated_since_version, version))
+				return_0;
+			fprintf(out->fp, "%s# Deprecated since version %s.\n", line, version);
 		}
-		fprintf(out->fp, "%s# Since version %s.\n", line, version);
 	}

 	return 1;
 }

+static int _should_print_cfg_with_undef_def_val(struct out_baton *out, cfg_def_item_t *cfg_def,
+						const struct dm_config_node *cn)
+{
+	if (!(cfg_def->flags & CFG_DEFAULT_UNDEFINED))
+		return 1;
+
+	/* print it only if the value is directly defined in some config = it's used */
+	return out->tree_spec->check_status && (out->tree_spec->check_status[cn->id] & CFG_USED);
+}
+
 static int _out_line_fn(const struct dm_config_node *cn, const char *line, void *baton)
 {
 	struct out_baton *out = baton;
-	struct cfg_def_item *cfg_def = cfg_def_get_item_p(cn->id);
+	struct cfg_def_item *cfg_def;
+	char config_path[CFG_PATH_MAX_LEN];
+	char summary[MAX_COMMENT_LINE+1];
+	char version[9];
+	int pos = 0;
+	size_t len;
+	char *space_prefix;

 	if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
 	    (!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
 		return 1;

-	fprintf(out->fp, "%s%s\n", (out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
-				   (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
-				   (cfg_def->flags & CFG_DEFAULT_UNDEFINED) ? "#" : "", line);
+	cfg_def = cfg_def_get_item_p(cn->id);
+
+	if (out->tree_spec->type == CFG_DEF_TREE_LIST) {
+		/* List view with node paths and summary. */
+		if (cfg_def->type & CFG_TYPE_SECTION)
+			return 1;
+		if (!_cfg_def_make_path(config_path, CFG_PATH_MAX_LEN, cfg_def->id, cfg_def, 1))
+			return_0;
+		if (out->tree_spec->withversions && !_get_config_node_version(cfg_def->since_version, version))
+			return_0;
+
+		summary[0] = '\0';
+		if (out->tree_spec->withsummary && cfg_def->comment)
+			_copy_one_line(cfg_def->comment, summary, &pos, strlen(cfg_def->comment));
+
+		fprintf(out->fp, "%s%s%s%s%s%s%s\n", config_path,
+			*summary || out->tree_spec->withversions ? " - ": "",
+			*summary ? summary : "",
+			*summary ? " " : "",
+			out->tree_spec->withversions ? "[" : "",
+			out->tree_spec->withversions ? version : "",
+			out->tree_spec->withversions ? "]" : "");
+
+		return 1;
+	}
+
+	/* Usual tree view with nodes and their values. */
+
+	if ((out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
+	    (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
+	    (out->tree_spec->type != CFG_DEF_TREE_FULL) &&
+	    (cfg_def->flags & (CFG_DEFAULT_UNDEFINED | CFG_DEFAULT_COMMENTED))) {
+		/* print with # at the front to comment out the line */
+		if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn)) {
+			space_prefix = ((len = strspn(line, "\t "))) ? dm_pool_strndup(out->mem, line, len) : NULL;
+			fprintf(out->fp, "%s%s%s\n", space_prefix ? : "", "# ", line + len);
+			if (space_prefix)
+				dm_pool_free(out->mem, space_prefix);
+		}
+		return 1;
+	}
+
+	/* print the line as it is */
+	if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn))
+		fprintf(out->fp, "%s\n", line);
+
 	return 1;
 }

@@ -1562,20 +1871,31 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 {
 	struct dm_config_node *cn;
 	const char *str;
+	uint32_t format_flags = 0;

 	if (!(cn = dm_config_create_node(cft, def->name))) {
 		log_error("Failed to create default config setting node.");
 		return NULL;
 	}

-	if (!(def->type & CFG_TYPE_SECTION) && (!(cn->v = dm_config_create_value(cft)))) {
-		log_error("Failed to create default config setting node value.");
-		return NULL;
+	if (!(def->type & CFG_TYPE_SECTION) && !(def->type & CFG_TYPE_ARRAY)) {
+		if (!(cn->v = dm_config_create_value(cft))) {
+			log_error("Failed to create default config setting node value.");
+			return NULL;
+		}
+		if (spec->withspaces)
+			format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
 	}

 	cn->id = def->id;

-	if (!(def->type & CFG_TYPE_ARRAY)) {
+	if (spec->unconfigured && def->default_unconfigured_value.v_UNCONFIGURED) {
+		cn->v->type = DM_CFG_STRING;
+		cn->v->v.str = cfg_def_get_default_unconfigured_value_hint(spec->cmd, def);
+		if (def->type != CFG_TYPE_STRING)
+			format_flags |= DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES;
+		dm_config_value_set_format_flags(cn->v, format_flags);
+	} else if (!(def->type & CFG_TYPE_ARRAY)) {
 		switch (def->type) {
 			case CFG_TYPE_SECTION:
 				cn->v = NULL;
@@ -1587,6 +1907,8 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 			case CFG_TYPE_INT:
 				cn->v->type = DM_CFG_INT;
 				cn->v->v.i = cfg_def_get_default_value_hint(spec->cmd, def, CFG_TYPE_INT, NULL);
+				if (def->flags & CFG_FORMAT_INT_OCTAL)
+					format_flags |= DM_CONFIG_VALUE_FMT_INT_OCTAL;
 				break;
 			case CFG_TYPE_FLOAT:
 				cn->v->type = DM_CFG_FLOAT;
@@ -1603,8 +1925,13 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 				return NULL;
 				break;
 		}
-	} else
-		cn->v = _get_def_array_values(cft, def);
+		dm_config_value_set_format_flags(cn->v, format_flags);
+	} else {
+		if (spec->withspaces)
+			format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
+		format_flags |= DM_CONFIG_VALUE_FMT_COMMON_ARRAY;
+		cn->v = _get_def_array_values(spec->cmd, cft, def, format_flags);
+	}

 	cn->child = NULL;
 	if (parent) {
@@ -1620,6 +1947,11 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 	return cn;
 }

+static int _should_skip_deprecated_def_node(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+	return spec->ignoredeprecated && _def_node_is_deprecated(def, spec);
+}
+
 static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_id, int id)
 {
 	cfg_def_item_t *def = cfg_def_get_item_p(id);
@@ -1631,6 +1963,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 		return 1;

 	switch (spec->type) {
+		case CFG_DEF_TREE_FULL:
+			/* fall through */
 		case CFG_DEF_TREE_MISSING:
 			if (!spec->check_status) {
 				log_error_once(INTERNAL_ERROR "couldn't determine missing "
@@ -1638,19 +1972,27 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 				return 1;
 			}
 			if ((spec->check_status[id] & CFG_USED) ||
-			    (def->flags & CFG_NAME_VARIABLE) ||
-			    (def->since_version > spec->version))
+			    (def->flags & CFG_NAME_VARIABLE))
+				return 1;
+
+			if ((spec->type == CFG_DEF_TREE_MISSING) &&
+			    ((def->since_version > spec->version) ||
+			     _should_skip_deprecated_def_node(def, spec)))
 				return 1;
 			break;
 		case CFG_DEF_TREE_NEW:
-			if (def->since_version != spec->version)
+			if ((def->since_version != spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			break;
 		case CFG_DEF_TREE_PROFILABLE:
+			/* fall through */
 		case CFG_DEF_TREE_PROFILABLE_CMD:
+			/* fall through */
 		case CFG_DEF_TREE_PROFILABLE_MDA:
 			if (!(def->flags & CFG_PROFILABLE) ||
-			    (def->since_version > spec->version))
+			    (def->since_version > spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			flags = def->flags & ~CFG_PROFILABLE;
 			if (spec->type == CFG_DEF_TREE_PROFILABLE_CMD) {
@@ -1662,7 +2004,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 			}
 			break;
 		default:
-			if (def->since_version > spec->version)
+			if ((def->since_version > spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			break;
 	}
@@ -1701,7 +2044,7 @@ bad:

 struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 {
-	struct dm_config_tree *cft;
+	struct dm_config_tree *cft = NULL, *tmp_cft = NULL;
 	struct dm_config_node *root = NULL, *relay = NULL, *tmp;
 	int id;

@@ -1714,6 +2057,9 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 		if (cfg_def_get_item_p(id)->parent != root_CFG_SECTION)
 			continue;

+		if (spec->ignorelocal && (id == local_CFG_SECTION))
+			continue;
+
 		if ((tmp = _add_def_section_subtree(cft, spec, root, relay, id))) {
 			relay = tmp;
 			if (!root)
@@ -1722,7 +2068,33 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 	}

 	cft->root = root;
+
+	if (spec->type == CFG_DEF_TREE_FULL) {
+		if (!(tmp_cft = dm_config_create())) {
+			log_error("Failed to create temporary config tree while creating full tree.");
+			goto bad;
+		}
+
+		if (!(tmp_cft->root = dm_config_clone_node_with_mem(cft->mem, spec->current_cft->root, 1))) {
+			log_error("Failed to clone current config tree.");
+			goto bad;
+		}
+
+		if (!merge_config_tree(spec->cmd, cft, tmp_cft, CONFIG_MERGE_TYPE_RAW)) {
+			log_error("Failed to merge default and current config tree.");
+			goto bad;
+		}
+
+		dm_config_destroy(tmp_cft);
+	}
+
 	return cft;
+bad:
+	if (cft)
+		dm_config_destroy(cft);
+	if (tmp_cft)
+		dm_config_destroy(tmp_cft);
+	return NULL;
 }

 static int _check_profile(struct cmd_context *cmd, struct profile *profile)
@@ -1901,6 +2273,11 @@ const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct pr
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@";
+}
+
 const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	const char *cache_dir = NULL, *cache_file_prefix = NULL;
@@ -1935,6 +2312,24 @@ const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profil
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd)
+{
+	const char *cache_file_prefix = NULL;
+	static char buf[PATH_MAX];
+
+	if (find_config_tree_node(cmd, devices_cache_file_prefix_CFG, NULL))
+		cache_file_prefix = find_config_tree_str_allow_empty(cmd, devices_cache_file_prefix_CFG, NULL);
+
+	if (dm_snprintf(buf, sizeof(buf), "%s/%s.cache",
+			get_default_unconfigured_devices_cache_dir_CFG(cmd),
+			cache_file_prefix ? : DEFAULT_CACHE_FILE_PREFIX) < 0) {
+		log_error("Persistent cache filename too long.");
+		return NULL;
+	}
+
+	return dm_pool_strdup(cmd->mem, buf);
+}
+
 const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -1948,6 +2343,11 @@ const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct pr
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@";
+}
+
 const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -1961,6 +2361,11 @@ const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct p
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@";
+}
+
 const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -1974,6 +2379,11 @@ const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct p
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_PROFILE_SUBDIR@";
+}
+
 const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	return find_config_tree_str(cmd, activation_mirror_device_fault_policy_CFG, profile);
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -72,6 +72,7 @@ typedef int (*t_fn_CFG_TYPE_INT) (struct cmd_context *cmd, struct profile *profi
 typedef float (*t_fn_CFG_TYPE_FLOAT) (struct cmd_context *cmd, struct profile *profile);
 typedef const char* (*t_fn_CFG_TYPE_STRING) (struct cmd_context *cmd, struct profile *profile);
 typedef const char* (*t_fn_CFG_TYPE_ARRAY) (struct cmd_context *cmd, struct profile *profile);
+typedef const char* (*t_fn_UNCONFIGURED) (struct cmd_context *cmd);

 /* configuration definition item value (for item's default value) */
 typedef union {
@@ -88,62 +89,86 @@ typedef union {
 	t_fn_CFG_TYPE_ARRAY fn_CFG_TYPE_ARRAY;
 } cfg_def_value_t;

+typedef union {
+	const char *v_UNCONFIGURED;
+	t_fn_UNCONFIGURED fn_UNCONFIGURED;
+} cfg_def_unconfigured_value_t;
+
 /* configuration definition item flags: */

+
 /* whether the configuration item name is variable */
-#define CFG_NAME_VARIABLE	0x01
+#define CFG_NAME_VARIABLE	0x001
 /* whether empty value is allowed */
-#define CFG_ALLOW_EMPTY		0x02
+#define CFG_ALLOW_EMPTY		0x002
 /* whether the configuration item is for advanced use only */
-#define CFG_ADVANCED		0x04
+#define CFG_ADVANCED		0x004
 /* whether the configuration item is not officially supported */
-#define CFG_UNSUPPORTED		0x08
+#define CFG_UNSUPPORTED		0x008
 /* whether the configuration item is customizable by a profile */
-#define CFG_PROFILABLE		0x10
+#define CFG_PROFILABLE		0x010
 /* whether the configuration item is customizable by a profile */
 /* and whether it can be attached to VG/LV metadata at the same time
 * The CFG_PROFILABLE_METADATA flag incorporates CFG_PROFILABLE flag!!! */
-#define CFG_PROFILABLE_METADATA 0x30
+#define CFG_PROFILABLE_METADATA 0x030
 /* whether the default value is undefned */
-#define CFG_DEFAULT_UNDEFINED	0x40
-/* whether the defualt value is calculated during run time */
-#define CFG_DEFAULT_RUN_TIME	0x80
+#define CFG_DEFAULT_UNDEFINED	0x040
+/* whether the default value is commented out on output */
+#define CFG_DEFAULT_COMMENTED	0x080
+/* whether the default value is calculated during run time */
+#define CFG_DEFAULT_RUN_TIME	0x100
+/* whether the configuration setting is disabled (and hence defaults always used) */
+#define CFG_DISABLED		0x200
+/* whether to print integers in octal form (prefixed by "0") */
+#define CFG_FORMAT_INT_OCTAL	0x400
+/* whether to disable checks for the whole config section subtree */
+#define CFG_SECTION_NO_CHECK	0x800

 /* configuration definition item structure */
 typedef struct cfg_def_item {
-	int id;				/* ID of this item */
-	int parent;			/* ID of parent item */
-	const char *name;		/* name of the item in configuration tree */
-	int type;			/* configuration item type (bits of cfg_def_type_t) */
-	cfg_def_value_t default_value;	/* default value (only for settings) */
-	uint16_t flags;			/* configuration item definition flags */
-	uint16_t since_version;		/* version this item appeared in */
-	const char *comment;		/* brief comment */
+	int id;								/* ID of this item */
+	int parent;							/* ID of parent item */
+	const char *name;						/* name of the item in configuration tree */
+	int type;							/* configuration item type (bits of cfg_def_type_t) */
+	cfg_def_value_t default_value;					/* default value (only for settings) */
+	uint16_t flags;							/* configuration item definition flags */
+	uint16_t since_version;						/* version this item appeared in */
+	cfg_def_unconfigured_value_t default_unconfigured_value;	/* default value in terms of @FOO@, pre-configured (only for settings) */
+	uint16_t deprecated_since_version;				/* version since this item is deprecated */
+	const char *deprecation_comment;				/* comment about reasons for deprecation and settings that supersede this one */
+	const char *comment;						/* comment */
 } cfg_def_item_t;

 /* configuration definition tree types */
 typedef enum {
 	CFG_DEF_TREE_CURRENT,		/* tree of nodes with values currently set in the config */
 	CFG_DEF_TREE_MISSING,		/* tree of nodes missing in current config using default values */
-	CFG_DEF_TREE_COMPLETE,		/* CURRENT + MISSING, the tree actually used within execution, not implemented yet */
+	CFG_DEF_TREE_FULL,		/* CURRENT + MISSING, the tree actually used within execution */
 	CFG_DEF_TREE_DEFAULT,		/* tree of all possible config nodes with default values */
 	CFG_DEF_TREE_NEW,		/* tree of all new nodes that appeared in given version */
 	CFG_DEF_TREE_PROFILABLE,	/* tree of all nodes that are customizable by profiles */
 	CFG_DEF_TREE_PROFILABLE_CMD,	/* tree of all nodes that are customizable by command profiles (subset of PROFILABLE) */
 	CFG_DEF_TREE_PROFILABLE_MDA,	/* tree of all nodes that are customizable by metadata profiles (subset of PROFILABLE) */
 	CFG_DEF_TREE_DIFF,		/* tree of all nodes that differ from defaults */
+	CFG_DEF_TREE_LIST,		/* list all nodes */
 } cfg_def_tree_t;

 /* configuration definition tree specification */
 struct config_def_tree_spec {
-	struct cmd_context *cmd;	/* command context (for run-time defaults */
-	cfg_def_tree_t type;		/* tree type */
-	uint16_t version;		/* tree at this LVM2 version */
+	struct cmd_context *cmd;		/* command context (for run-time defaults */
+	struct dm_config_tree *current_cft;	/* current config tree which is defined explicitly - defaults are not used */
+	cfg_def_tree_t type;			/* tree type */
+	uint16_t version;			/* tree at this LVM2 version */
 	unsigned ignoreadvanced:1;		/* do not include advanced configs */
-	unsigned ignoreunsupported:1;	/* do not include unsupported configs */
-	unsigned withcomments:1;		/* include comments */
+	unsigned ignoreunsupported:1;		/* do not include unsupported configs */
+	unsigned ignoredeprecated:1;		/* do not include deprecated configs */
+	unsigned ignorelocal:1;			/* do not include the local section */
+	unsigned withsummary:1;			/* include first line of comments - a summary */
+	unsigned withcomments:1;		/* include all comment lines */
 	unsigned withversions:1;		/* include versions */
-	uint8_t *check_status;		/* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
+	unsigned withspaces:1;			/* add more spaces in output for better readability */
+	unsigned unconfigured:1;		/* use unconfigured path strings */
+	uint8_t *check_status;			/* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
 };


@@ -158,11 +183,11 @@ struct config_def_tree_spec {
 * Register ID for each possible item in the configuration tree.
 */
 enum {
-#define cfg_section(id, name, parent, flags, since_version, comment) id,
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) id,
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) id,
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) id,
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) id,
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) id,
 #include "config_settings.h"
 #undef cfg_section
 #undef cfg
@@ -184,6 +209,8 @@ struct cft_check_handle {
 	unsigned skip_if_checked:1;	/* skip the check if already done before - return last state */
 	unsigned suppress_messages:1;	/* suppress messages during the check if config item is found invalid */
 	unsigned check_diff:1;		/* check if the value used differs from default one */
+	unsigned ignoreadvanced:1;	/* do not include advnced configs */
+	unsigned ignoreunsupported:1;	/* do not include unsupported configs */
 	uint8_t status[CFG_COUNT];	/* flags for each configuration item - the result of the check */
 };

@@ -202,7 +229,8 @@ typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_
 struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open);
 int config_file_read_fd(struct dm_config_tree *cft, struct device *dev,
 			off_t offset, size_t size, off_t offset2, size_t size2,
-			checksum_fn_t checksum_fn, uint32_t checksum);
+			checksum_fn_t checksum_fn, uint32_t checksum,
+			int skip_parse);
 int config_file_read(struct dm_config_tree *cft);
 struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source,
 						 struct cmd_context *cmd);
@@ -211,7 +239,7 @@ int config_write(struct dm_config_tree *cft, struct config_def_tree_spec *tree_s
 struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec);
 void config_destroy(struct dm_config_tree *cft);

-time_t config_file_timestamp(struct dm_config_tree *cft);
+struct timespec config_file_timestamp(struct dm_config_tree *cft);
 int config_file_changed(struct dm_config_tree *cft);
 int config_file_check(struct dm_config_tree *cft, const char **filename, struct stat *info);

@@ -230,6 +258,12 @@ typedef enum {
 int merge_config_tree(struct cmd_context *cmd, struct dm_config_tree *cft,
 		      struct dm_config_tree *newdata, config_merge_t);

+/*
+ * The next two do not check config overrides and must only be used for the tags section.
+ */
+const struct dm_config_node *find_config_node(struct cmd_context *cmd, struct dm_config_tree *cft, int id);
+int find_config_bool(struct cmd_context *cmd, struct dm_config_tree *cft, int id);
+
 /*
 * These versions check an override tree, if present, first.
 */
@@ -240,18 +274,27 @@ int find_config_tree_int(struct cmd_context *cmd, int id, struct profile *profil
 int64_t find_config_tree_int64(struct cmd_context *cmd, int id, struct profile *profile);
 float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *profile);
 int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profile);
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile);

 /*
 * Functions for configuration settings for which the default
 * value is evaluated at runtime based on command context.
 */
 const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd);
 const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd);
 const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd);
 const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd);
 const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd);
 const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_activation_mirror_image_fault_policy_CFG NULL
 int get_default_allocation_thin_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_thin_pool_chunk_size_CFG NULL
 int get_default_allocation_cache_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_cache_pool_chunk_size_CFG NULL

 #endif
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
--- a/lib/config/defaults.h
+++ b/lib/config/defaults.h
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
 *
 * This file is part of LVM2.
 *
@@ -29,9 +29,12 @@

 #define DEFAULT_DEV_DIR "/dev"
 #define DEFAULT_PROC_DIR "/proc"
+#define DEFAULT_SYSTEM_ID_SOURCE "none"
 #define DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV 1
+#define DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE "none"
 #define DEFAULT_SYSFS_SCAN 1
 #define DEFAULT_MD_COMPONENT_DETECTION 1
+#define DEFAULT_FW_RAID_COMPONENT_DETECTION 0
 #define DEFAULT_MD_CHUNK_ALIGNMENT 1
 #define DEFAULT_IGNORE_LVM_MIRRORS 1
 #define DEFAULT_MULTIPATH_COMPONENT_DETECTION 1
@@ -44,21 +47,24 @@
 #define DEFAULT_PV_MIN_SIZE_KB 2048

 #define DEFAULT_LOCKING_LIB "liblvm2clusterlock.so"
+#define DEFAULT_ERROR_WHEN_FULL 0
 #define DEFAULT_FALLBACK_TO_LOCAL_LOCKING 1
 #define DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING 1
 #define DEFAULT_WAIT_FOR_LOCKS 1
+#define DEFAULT_LVMLOCKD_LOCK_RETRIES 3
 #define DEFAULT_PRIORITISE_WRITE_LOCKS 1
 #define DEFAULT_USE_MLOCKALL 0
 #define DEFAULT_METADATA_READ_ONLY 0
 #define DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH 0

-#define DEFAULT_MIRROR_SEGTYPE "raid1"
-#define DEFAULT_MIRRORLOG "disk"
+#define DEFAULT_SANLOCK_LV_EXTEND_MB 256
+
+#define DEFAULT_MIRRORLOG MIRROR_LOG_DISK
 #define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
 #define DEFAULT_MIRROR_IMAGE_FAULT_POLICY "remove"
 #define DEFAULT_MIRROR_MAX_IMAGES 8 /* limited by kernel DM_KCOPYD_MAX_REGIONS */
-#define DEFAULT_RAID10_SEGTYPE "raid10"
 #define DEFAULT_RAID_FAULT_POLICY "warn"
+
 #define DEFAULT_DMEVENTD_RAID_LIB "libdevmapper-event-lvm2raid.so"
 #define DEFAULT_DMEVENTD_MIRROR_LIB "libdevmapper-event-lvm2mirror.so"
 #define DEFAULT_DMEVENTD_SNAPSHOT_LIB "libdevmapper-event-lvm2snapshot.so"
@@ -73,12 +79,17 @@
 #endif

 #ifdef THIN_CHECK_NEEDS_CHECK
-#  define DEFAULT_THIN_CHECK_OPTIONS "-q --clear-needs-check-flag"
+#  define DEFAULT_THIN_CHECK_OPTION1 "-q"
+#  define DEFAULT_THIN_CHECK_OPTION2 "--clear-needs-check-flag"
+#  define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1 "#S" DEFAULT_THIN_CHECK_OPTION2
 #else
-#  define DEFAULT_THIN_CHECK_OPTIONS "-q"
+#  define DEFAULT_THIN_CHECK_OPTION1 "-q"
+#  define DEFAULT_THIN_CHECK_OPTION2 ""
+#  define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1
 #endif

-#define DEFAULT_THIN_REPAIR_OPTIONS ""
+#define DEFAULT_THIN_REPAIR_OPTION1 ""
+#define DEFAULT_THIN_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_THIN_REPAIR_OPTION1
 #define DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
 #define DEFAULT_THIN_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024)  /* KB */
 #define DEFAULT_THIN_POOL_MIN_METADATA_SIZE 2048  /* KB */
@@ -90,21 +101,27 @@
 #define DEFAULT_THIN_POOL_ZERO 1
 #define DEFAULT_POOL_METADATA_SPARE 1 /* thin + cache */

-#define DEFAULT_CACHE_CHECK_OPTIONS "-q"
-#define DEFAULT_CACHE_REPAIR_OPTIONS ""
+#ifdef CACHE_CHECK_NEEDS_CHECK
+#  define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+#  define DEFAULT_CACHE_CHECK_OPTION2 "--clear-needs-check-flag"
+#  define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1 "#S" DEFAULT_CACHE_CHECK_OPTION2
+#else
+#  define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+#  define DEFAULT_CACHE_CHECK_OPTION2 ""
+#  define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1
+#endif
+
+#define DEFAULT_CACHE_REPAIR_OPTION1 ""
+#define DEFAULT_CACHE_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_CACHE_REPAIR_OPTION1
 #define DEFAULT_CACHE_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
 #define DEFAULT_CACHE_POOL_CHUNK_SIZE 64 /* KB */
 #define DEFAULT_CACHE_POOL_MIN_METADATA_SIZE 2048  /* KB */
 #define DEFAULT_CACHE_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024)  /* KB */
+#define DEFAULT_CACHE_POOL_CACHEMODE "writethrough"
+#define DEFAULT_CACHE_POOL_POLICY "mq"

 #define DEFAULT_UMASK 0077

-#ifdef LVM1_FALLBACK
-#  define DEFAULT_FALLBACK_TO_LVM1 1
-#else
-#  define DEFAULT_FALLBACK_TO_LVM1 0
-#endif
-
 #define DEFAULT_FORMAT "lvm2"

 #define DEFAULT_STRIPESIZE 64	/* KB */
@@ -135,10 +152,6 @@
 #  define DEFAULT_LOG_FACILITY LOG_USER
 #endif

-#define DEFAULT_LOGGED_DEBUG_CLASSES (LOG_CLASS_MEM | LOG_CLASS_DEVS | \
-    LOG_CLASS_ACTIVATION | LOG_CLASS_ALLOC | LOG_CLASS_LVMETAD | \
-    LOG_CLASS_METADATA | LOG_CLASS_CACHE | LOG_CLASS_LOCKING)
-
 #define DEFAULT_SYSLOG 1
 #define DEFAULT_VERBOSE 0
 #define DEFAULT_SILENT 0
@@ -175,6 +188,7 @@

 #define DEFAULT_MAX_ERROR_COUNT	NO_DEV_ERROR_COUNT_LIMIT

+#define DEFAULT_REP_COMPACT_OUTPUT 0
 #define DEFAULT_REP_ALIGNED 1
 #define DEFAULT_REP_BUFFERED 1
 #define DEFAULT_REP_COLUMNS_AS_ROWS 0
@@ -183,6 +197,7 @@
 #define DEFAULT_REP_QUOTED 1
 #define DEFAULT_REP_SEPARATOR " "
 #define DEFAULT_REP_LIST_ITEM_SEPARATOR ","
+#define DEFAULT_TIME_FORMAT "%Y-%m-%d %T %z"

 #define DEFAULT_LVS_COLS "lv_name,vg_name,lv_attr,lv_size,pool_lv,origin,data_percent,metadata_percent,move_pv,mirror_log,copy_percent,convert_lv"
 #define DEFAULT_VGS_COLS "vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free"
@@ -212,4 +227,6 @@
 #define DEFAULT_THIN_POOL_AUTOEXTEND_THRESHOLD 100
 #define DEFAULT_THIN_POOL_AUTOEXTEND_PERCENT 20

+#define DEFAULT_CY_LOCK_TYPE "sanlock"
+
 #endif				/* _LVM_DEFAULTS_H */
--- a/lib/datastruct/str_list.c
+++ b/lib/datastruct/str_list.c
@@ -71,6 +71,21 @@ int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str)
 	return str_list_add_no_dup_check(mem, sll, str);
 }

+/* Add contents of sll2 to sll */
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2)
+{
+	struct dm_str_list *sl;
+
+	if (!sll2)
+		return_0;
+
+	dm_list_iterate_items(sl, sll2)
+		if (!str_list_add(mem, sll, sl->str))
+			return_0;
+
+	return 1;
+}
+
 void str_list_del(struct dm_list *sll, const char *str)
 {
 	struct dm_list *slh, *slht;
--- a/lib/datastruct/str_list.h
+++ b/lib/datastruct/str_list.h
@@ -21,6 +21,7 @@ struct dm_pool;

 struct dm_list *str_list_create(struct dm_pool *mem);
 int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str);
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2);
 int str_list_add_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
 int str_list_add_h_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
 void str_list_del(struct dm_list *sll, const char *str);
--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@@ -64,6 +64,9 @@ static void _dev_init(struct device *dev, int max_error_count)
 	dev->read_ahead = -1;
 	dev->max_error_count = max_error_count;

+	dev->ext.enabled = 0;
+	dev->ext.src = DEV_EXT_NONE;
+
 	dm_list_init(&dev->aliases);
 	dm_list_init(&dev->open_list);
 }
@@ -678,10 +681,12 @@ static int _init_preferred_names(struct cmd_context *cmd)

 	_cache.preferred_names_matcher = NULL;

-	if (!(cn = find_config_tree_node(cmd, devices_preferred_names_CFG, NULL)) ||
+	if (!(cn = find_config_tree_array(cmd, devices_preferred_names_CFG, NULL)) ||
 	    cn->v->type == DM_CFG_EMPTY_ARRAY) {
-		log_very_verbose("devices/preferred_names not found in config file: "
-				 "using built-in preferences");
+		log_very_verbose("devices/preferred_names %s: "
+				 "using built-in preferences",
+				 cn && cn->v->type == DM_CFG_EMPTY_ARRAY ? "is empty"
+									 : "not found in config");
 		return 1;
 	}

@@ -940,7 +945,7 @@ struct device *dev_cache_get(const char *name, struct dev_filter *f)
 		if (d)
 			dm_hash_remove(_cache.names, name);
 		log_sys_very_verbose("stat", name);
-		return NULL;
+		d = NULL;
 	}

 	if (d && (buf.st_rdev != d->dev)) {
@@ -983,12 +988,31 @@ static struct device *_dev_cache_seek_devt(dev_t dev)
 */
 struct device *dev_cache_get_by_devt(dev_t dev, struct dev_filter *f)
 {
+	char path[PATH_MAX];
+	const char *sysfs_dir;
+	struct stat info;
 	struct device *d = _dev_cache_seek_devt(dev);

 	if (d && (d->flags & DEV_REGULAR))
 		return d;

 	if (!d) {
+		sysfs_dir = dm_sysfs_dir();
+		if (sysfs_dir && *sysfs_dir) {
+			/* First check if dev is sysfs to avoid useless scan */
+			if (dm_snprintf(path, sizeof(path), "%s/dev/block/%d:%d",
+					sysfs_dir, (int)MAJOR(dev), (int)MINOR(dev)) < 0) {
+				log_error("dm_snprintf partition failed.");
+				return NULL;
+			}
+
+			if (lstat(path, &info)) {
+				log_debug("No sysfs entry for %d:%d.",
+					  (int)MAJOR(dev), (int)MINOR(dev));
+				return NULL;
+			}
+		}
+
 		_full_scan(0);
 		d = _dev_cache_seek_devt(dev);
 	}
@@ -1009,9 +1033,11 @@ struct dev_iter *dev_iter_create(struct dev_filter *f, int dev_scan)
 	if (dev_scan && !trust_cache()) {
 		/* Flag gets reset between each command */
 		if (!full_scan_done()) {
-			if (f && f->wipe)
-				f->wipe(f); /* Calls _full_scan(1) */
-			else
+			if (f && f->wipe) {
+				f->wipe(f); /* might call _full_scan(1) */
+				if (!full_scan_done())
+					_full_scan(1);
+			} else
 				_full_scan(1);
 		}
 	} else
--- a/lib/device/dev-ext-udev-constants.h
+++ b/lib/device/dev-ext-udev-constants.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/*************************************************************************
+ * Properties saved in udev db and accesible via libudev and used by LVM *
+ *************************************************************************/
+
+/*
+ * DEV_EXT_UDEV_BLKID_TYPE property with various DEV_EXT_UDEV_BLKID_TYPE_*
+ * values that is saved in udev db via blkid call in udev rules
+ */
+#define DEV_EXT_UDEV_BLKID_TYPE                 "ID_FS_TYPE"
+/*
+ * mpath_member is forced by multipath - it's set in udev db via
+ * multipath call overwriting any existing ID_FS_TYPE value for
+ * a device which is a multipath component which prevents incorrect
+ * claim of the device by any other block device subsystem
+ */
+#define DEV_EXT_UDEV_BLKID_TYPE_MPATH           "mpath_member"
+/* FW RAIDs are all *_raid_member types except linux_raid_member which denotes SW RAID */
+#define DEV_EXT_UDEV_BLKID_TYPE_RAID_SUFFIX     "_raid_member"
+#define DEV_EXT_UDEV_BLKID_TYPE_SW_RAID         "linux_raid_member"
+#define DEV_EXT_UDEV_BLKID_PART_TABLE_TYPE      "ID_PART_TABLE_TYPE"
+#define DEV_EXT_UDEV_BLKID_PART_ENTRY_DISK      "ID_PART_ENTRY_DISK"
+
+/*
+ * DEV_EXT_UDEV_MPATH_DEVICE_PATH is set by multipath in udev db
+ * with value either 0 or 1. The same functionality as
+ * DEV_EXT_UDEV_BLKID_TYPE_MPATH actually, but introduced later
+ * for some reason.
+ */
+#define DEV_EXT_UDEV_MPATH_DEVICE_PATH          "DM_MULTIPATH_DEVICE_PATH"
+
+
+/***********************************************************
+ * Sysfs attributes accessible via libudev and used by LVM *
+ ***********************************************************/
+
+/* the value of size sysfs attribute is size in bytes */
+#define DEV_EXT_UDEV_SYSFS_ATTR_SIZE            "size"
+
--- a/lib/device/dev-ext.c
+++ b/lib/device/dev-ext.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lib.h"
+#include "device.h"
+
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#endif
+
+struct ext_registry_item {
+	const char *name;
+	struct dev_ext *(* dev_ext_get) (struct device *dev);
+	int (*dev_ext_release) (struct device *dev);
+};
+
+#define EXT_REGISTER(id,name) [id] = { #name, &_dev_ext_get_ ## name, &_dev_ext_release_ ## name }
+
+/*
+ * DEV_EXT_NONE
+ */
+static struct dev_ext *_dev_ext_get_none(struct device *dev)
+{
+	dev->ext.handle = NULL;
+	return &dev->ext;
+}
+
+static int _dev_ext_release_none(struct device *dev)
+{
+	dev->ext.handle = NULL;
+	return 1;
+}
+
+/*
+ * DEV_EXT_UDEV
+ */
+static struct dev_ext *_dev_ext_get_udev(struct device *dev)
+{
+#ifdef UDEV_SYNC_SUPPORT
+	struct udev *udev;
+	struct udev_device *udev_device;
+
+	if (dev->ext.handle)
+		return &dev->ext;
+
+	if (!(udev = udev_get_library_context()))
+		return_NULL;
+
+	if (!(udev_device = udev_device_new_from_devnum(udev, 'b', dev->dev)))
+		return_NULL;
+
+	dev->ext.handle = (void *) udev_device;
+	return &dev->ext;
+#else
+	return NULL;
+#endif
+}
+
+static int _dev_ext_release_udev(struct device *dev)
+{
+#ifdef UDEV_SYNC_SUPPORT
+	if (!dev->ext.handle)
+		return 1;
+
+	/* udev_device_unref can't fail - it has no return value */
+	udev_device_unref((struct udev_device *) dev->ext.handle);
+	dev->ext.handle = NULL;
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+static struct ext_registry_item _ext_registry[DEV_EXT_NUM] = {
+	EXT_REGISTER(DEV_EXT_NONE, none),
+	EXT_REGISTER(DEV_EXT_UDEV, udev)
+};
+
+const char *dev_ext_name(struct device *dev)
+{
+	return _ext_registry[dev->ext.src].name;
+}
+
+static const char *_ext_attached_msg = "External handle attached to device";
+
+struct dev_ext *dev_ext_get(struct device *dev)
+{
+	struct dev_ext *ext;
+	void *handle_ptr;
+
+	handle_ptr = dev->ext.handle;
+
+	if (!(ext = _ext_registry[dev->ext.src].dev_ext_get(dev)))
+		log_error("Failed to get external handle for device %s [%s].",
+			   dev_name(dev), dev_ext_name(dev));
+	else if (handle_ptr != dev->ext.handle)
+		log_debug_devs("%s %s [%s:%p]", _ext_attached_msg, dev_name(dev),
+				dev_ext_name(dev), dev->ext.handle);
+
+	return ext;
+}
+
+int dev_ext_release(struct device *dev)
+{
+	int r;
+	void *handle_ptr;
+
+	if (!dev->ext.enabled ||
+	    !dev->ext.handle)
+		return 1;
+
+	handle_ptr = dev->ext.handle;
+
+	if (!(r = _ext_registry[dev->ext.src].dev_ext_release(dev)))
+		log_error("Failed to release external handle for device %s [%s:%p].",
+			  dev_name(dev), dev_ext_name(dev), dev->ext.handle);
+	else
+		log_debug_devs("External handle detached from device %s [%s:%p]",
+				dev_name(dev), dev_ext_name(dev), handle_ptr);
+
+	return r;
+}
+
+int dev_ext_enable(struct device *dev, dev_ext_t src)
+{
+	if (dev->ext.enabled && (dev->ext.src != src) && !dev_ext_release(dev)) {
+		log_error("Failed to enable external handle for device %s [%s].",
+			   dev_name(dev), _ext_registry[src].name); 
+		return 0;
+	}
+
+	dev->ext.src = src;
+	dev->ext.enabled = 1;
+
+	return 1;
+}
+
+int dev_ext_disable(struct device *dev)
+{
+	if (!dev->ext.enabled)
+		return 1;
+
+	if (!dev_ext_release(dev)) {
+		log_error("Failed to disable external handle for device %s [%s].",
+			   dev_name(dev), dev_ext_name(dev));
+		return 0;
+	}
+
+	dev->ext.enabled = 0;
+	dev->ext.src = DEV_EXT_NONE;
+
+	return 1;
+}
--- a/lib/device/dev-io.c
+++ b/lib/device/dev-io.c
@@ -154,7 +154,7 @@ int dev_get_block_size(struct device *dev, unsigned int *physical_block_size, un
 		}
 		log_debug_devs("%s: physical block size is %u bytes", name, dev->phys_block_size);
 	}
-#elif BLKSSZGET
+#elif defined (BLKSSZGET)
 	/* if we can't get physical block size, just use logical block size instead */
 	if (dev->phys_block_size == -1) {
 		if (ioctl(dev_fd(dev), BLKSSZGET, &dev->phys_block_size) < 0) {
@@ -289,25 +289,22 @@ static int _dev_get_size_file(const struct device *dev, uint64_t *size)
 	return 1;
 }

-static int _dev_get_size_dev(const struct device *dev, uint64_t *size)
+static int _dev_get_size_dev(struct device *dev, uint64_t *size)
 {
-	int fd;
 	const char *name = dev_name(dev);

-	if ((fd = open(name, O_RDONLY)) < 0) {
-		log_sys_error("open", name);
-		return 0;
-	}
+	if (!dev_open_readonly(dev))
+		return_0;

-	if (ioctl(fd, BLKGETSIZE64, size) < 0) {
+	if (ioctl(dev_fd(dev), BLKGETSIZE64, size) < 0) {
 		log_sys_error("ioctl BLKGETSIZE64", name);
-		if (close(fd))
+		if (!dev_close(dev))
 			log_sys_error("close", name);
 		return 0;
 	}

 	*size >>= BLKSIZE_SHIFT;	/* Convert to sectors */
-	if (close(fd))
+	if (!dev_close(dev))
 		log_sys_error("close", name);

 	log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size);
@@ -377,7 +374,7 @@ static int _dev_discard_blocks(struct device *dev, uint64_t offset_bytes, uint64
 * Public functions
 *---------------------------------------------------------------*/

-int dev_get_size(const struct device *dev, uint64_t *size)
+int dev_get_size(struct device *dev, uint64_t *size)
 {
 	if (!dev)
 		return 0;
--- a/lib/device/dev-md.c
+++ b/lib/device/dev-md.c
@@ -15,8 +15,11 @@

 #include "lib.h"
 #include "dev-type.h"
-#include "metadata.h"
 #include "xlate.h"
+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h> /* for MD detection using udev db records */
+#include "dev-ext-udev-constants.h"
+#endif

 #ifdef __linux__

@@ -82,10 +85,31 @@ static uint64_t _v1_sb_offset(uint64_t size, md_minor_version_t minor_version)
 	return sb_offset;
 }

+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_md(struct device *dev)
+{
+	const char *value;
+	struct dev_ext *ext;
+
+	if (!(ext = dev_ext_get(dev)))
+		return_0;
+
+	if (!(value = udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_TYPE)))
+		return 0;
+
+	return !strcmp(value, DEV_EXT_UDEV_BLKID_TYPE_SW_RAID);
+}
+#else
+static int _udev_dev_is_md(struct device *dev)
+{
+	return 0;
+}
+#endif
+
 /*
 * Returns -1 on error
 */
-int dev_is_md(struct device *dev, uint64_t *offset_found)
+static int _native_dev_is_md(struct device *dev, uint64_t *offset_found)
 {
 	int ret = 1;
 	md_minor_version_t minor;
@@ -130,6 +154,27 @@ out:
 	return ret;
 }

+int dev_is_md(struct device *dev, uint64_t *offset_found)
+{
+
+	/*
+	 * If non-native device status source is selected, use it
+	 * only if offset_found is not requested as this
+	 * information is not in udev db.
+	 */
+	if ((dev->ext.src == DEV_EXT_NONE) || offset_found)
+		return _native_dev_is_md(dev, offset_found);
+
+	if (dev->ext.src == DEV_EXT_UDEV)
+		return _udev_dev_is_md(dev);
+
+	log_error(INTERNAL_ERROR "Missing hook for MD device recognition "
+		  "using external device info source %s", dev_ext_name(dev));
+
+	return -1;
+
+}
+
 static int _md_sysfs_attribute_snprintf(char *path, size_t size,
 					struct dev_types *dt,
 					struct device *blkdev,
--- a/lib/device/dev-type.c
+++ b/lib/device/dev-type.c
@@ -25,6 +25,11 @@
 #include <blkid.h>
 #endif

+#ifdef UDEV_SYNC_SUPPORT
+#include <libudev.h>
+#include "dev-ext-udev-constants.h"
+#endif
+
 #include "device-types.h"

 struct dev_types *create_dev_types(const char *proc_dir,
@@ -112,6 +117,10 @@ struct dev_types *create_dev_types(const char *proc_dir,
 		if (!strncmp("drbd", line + i, 4) && isspace(*(line + i + 4)))
 			dt->drbd_major = line_maj;

+		/* Look for DASD */
+		if (!strncmp("dasd", line + i, 4) && isspace(*(line + i + 4)))
+			dt->dasd_major = line_maj;
+
 		/* Look for EMC powerpath */
 		if (!strncmp("emcpower", line + i, 8) && isspace(*(line + i + 8)))
 			dt->emcpower_major = line_maj;
@@ -216,12 +225,18 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev)

 const char *dev_subsystem_name(struct dev_types *dt, struct device *dev)
 {
+	if (MAJOR(dev->dev) == dt->device_mapper_major)
+		return "DM";
+
 	if (MAJOR(dev->dev) == dt->md_major)
 		return "MD";

 	if (MAJOR(dev->dev) == dt->drbd_major)
 		return "DRBD";

+	if (MAJOR(dev->dev) == dt->dasd_major)
+		return "DASD";
+
 	if (MAJOR(dev->dev) == dt->emcpower_major)
 		return "EMCPOWER";

@@ -272,6 +287,9 @@ static int _is_partitionable(struct dev_types *dt, struct device *dev)
 {
 	int parts = major_max_partitions(dt, MAJOR(dev->dev));

+	if (MAJOR(dev->dev) == dt->device_mapper_major)
+		return 1;
+
 	/* All MD devices are partitionable via blkext (as of 2.6.28) */
 	if (MAJOR(dev->dev) == dt->md_major)
 		return 1;
@@ -314,12 +332,66 @@ static int _has_partition_table(struct device *dev)
 	return ret;
 }

-int dev_is_partitioned(struct dev_types *dt, struct device *dev)
+#ifdef UDEV_SYNC_SUPPORT
+static int _udev_dev_is_partitioned(struct device *dev)
 {
+	struct dev_ext *ext;
+
+	if (!(ext = dev_ext_get(dev)))
+		return_0;
+
+	if (!udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_PART_TABLE_TYPE))
+		return 0;
+
+	if (udev_device_get_property_value((struct udev_device *)ext->handle, DEV_EXT_UDEV_BLKID_PART_ENTRY_DISK))
+		return 0;
+
+	return 1;
+}
+#else
+static int _udev_dev_is_partitioned(struct device *dev)
+{
+	return 0;
+}
+#endif
+
+static int _native_dev_is_partitioned(struct dev_types *dt, struct device *dev)
+{
+	int r;
+
 	if (!_is_partitionable(dt, dev))
 		return 0;

-	return _has_partition_table(dev);
+	/* Unpartitioned DASD devices are not supported. */
+	if (MAJOR(dev->dev) == dt->dasd_major)
+		return 1;
+
+	if (!dev_open_readonly_quiet(dev)) {
+		log_debug_devs("%s: failed to open device, considering device "
+			       "is partitioned", dev_name(dev));
+		return 1;
+	}
+
+	r = _has_partition_table(dev);
+
+	if (!dev_close(dev))
+		stack;
+
+	return r;
+}
+
+int dev_is_partitioned(struct dev_types *dt, struct device *dev)
+{
+	if (dev->ext.src == DEV_EXT_NONE)
+		return _native_dev_is_partitioned(dt, dev);
+
+	if (dev->ext.src == DEV_EXT_UDEV)
+		return _udev_dev_is_partitioned(dev);
+
+	log_error(INTERNAL_ERROR "Missing hook for partition table recognition "
+		  "using external device info source %s", dev_ext_name(dev));
+
+	return 0;
 }

 /*
@@ -361,7 +433,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
 	 */
 	if ((parts = dt->dev_type_array[major].max_partitions) > 1) {
 		if ((residue = minor % parts)) {
-			*result = MKDEV((dev_t)major, (minor - residue));
+			*result = MKDEV((dev_t)major, (dev_t)(minor - residue));
 			ret = 2;
 		} else {
 			*result = dev->dev;
@@ -438,7 +510,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
 			  path, buffer);
 		goto out;
 	}
-	*result = MKDEV((dev_t)major, minor);
+	*result = MKDEV((dev_t)major, (dev_t)minor);
 	ret = 2;
 out:
 	if (fp && fclose(fp))
@@ -456,13 +528,15 @@ static inline int _type_in_flag_list(const char *type, uint32_t flag_list)
 		((flag_list & TYPE_DM_SNAPSHOT_COW) && !strcmp(type, "DM_snapshot_cow")));
 }

+#define MSG_FAILED_SIG_OFFSET "Failed to get offset of the %s signature on %s."
+#define MSG_FAILED_SIG_LENGTH "Failed to get length of the %s signature on %s."
+#define MSG_WIPING_SKIPPED " Wiping skipped."
+
 static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
 		       uint32_t types_to_exclude, uint32_t types_no_prompt,
 		       int yes, force_t force)
 {
-	static const char const _msg_failed_offset[] = "Failed to get offset of the %s signature on %s.";
-	static const char const _msg_failed_length[] = "Failed to get length of the %s signature on %s.";
-	static const char const _msg_wiping[] = "Wiping %s signature on %s.";
+	static const char _msg_wiping[] = "Wiping %s signature on %s.";
 	const char *offset = NULL, *type = NULL, *magic = NULL,
 		   *usage = NULL, *label = NULL, *uuid = NULL;
 	loff_t offset_value;
@@ -470,23 +544,43 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,

 	if (!blkid_probe_lookup_value(probe, "TYPE", &type, NULL)) {
 		if (_type_in_flag_list(type, types_to_exclude))
-			return 1;
+			return 2;
 		if (blkid_probe_lookup_value(probe, "SBMAGIC_OFFSET", &offset, NULL)) {
-			log_error(_msg_failed_offset, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_OFFSET, type, name);
+				return 0;
+			} else {
+				log_error("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		if (blkid_probe_lookup_value(probe, "SBMAGIC", &magic, &len)) {
-			log_error(_msg_failed_length, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_LENGTH, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 	} else if (!blkid_probe_lookup_value(probe, "PTTYPE", &type, NULL)) {
 		if (blkid_probe_lookup_value(probe, "PTMAGIC_OFFSET", &offset, NULL)) {
-			log_error(_msg_failed_offset, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_OFFSET, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		if (blkid_probe_lookup_value(probe, "PTMAGIC", &magic, &len)) {
-			log_error(_msg_failed_length, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_LENGTH, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		usage = "partition table";
 	} else
@@ -526,12 +620,17 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
 static int _wipe_known_signatures_with_blkid(struct device *dev, const char *name,
 					     uint32_t types_to_exclude,
 					     uint32_t types_no_prompt,
-					     int yes, force_t force)
+					     int yes, force_t force, int *wiped)
 {
 	blkid_probe probe = NULL;
-	int found = 0, wiped = 0, left = 0;
+	int found = 0, left = 0, wiped_tmp;
+	int r_wipe;
 	int r = 0;

+	if (!wiped)
+		wiped = &wiped_tmp;
+	*wiped = 0;
+
 	/* TODO: Should we check for valid dev - _dev_is_valid(dev)? */

 	if (!(probe = blkid_new_probe_from_filename(dev_name(dev)))) {
@@ -552,15 +651,17 @@ static int _wipe_known_signatures_with_blkid(struct device *dev, const char *nam
 						 BLKID_SUBLKS_BADCSUM);

 	while (!blkid_do_probe(probe)) {
-		found++;
-		if (_blkid_wipe(probe, dev, name, types_to_exclude, types_no_prompt, yes, force))
-			wiped++;
+		if ((r_wipe = _blkid_wipe(probe, dev, name, types_to_exclude, types_no_prompt, yes, force)) == 1)
+			(*wiped)++;
+		/* do not count excluded types */
+		if (r_wipe != 2)
+			found++;
 	}

 	if (!found)
 		r = 1;

-	left = found - wiped;
+	left = found - *wiped;
 	if (!left)
 		r = 1;
 	else
@@ -575,7 +676,7 @@ out:
 #endif /* BLKID_WIPING_SUPPORT */

 static int _wipe_signature(struct device *dev, const char *type, const char *name,
-			   int wipe_len, int yes, force_t force,
+			   int wipe_len, int yes, force_t force, int *wiped,
 			   int (*signature_detection_fn)(struct device *dev, uint64_t *offset_found))
 {
 	int wipe;
@@ -605,17 +706,24 @@ static int _wipe_signature(struct device *dev, const char *type, const char *nam
 		return 0;
 	}

+	(*wiped)++;
 	return 1;
 }

 static int _wipe_known_signatures_with_lvm(struct device *dev, const char *name,
 					   uint32_t types_to_exclude __attribute__((unused)),
 					   uint32_t types_no_prompt __attribute__((unused)),
-					   int yes, force_t force)
+					   int yes, force_t force, int *wiped)
 {
-	if (!_wipe_signature(dev, "software RAID md superblock", name, 4, yes, force, dev_is_md) ||
-	    !_wipe_signature(dev, "swap signature", name, 10, yes, force, dev_is_swap) ||
-	    !_wipe_signature(dev, "LUKS signature", name, 8, yes, force, dev_is_luks))
+	int wiped_tmp;
+
+	if (!wiped)
+		wiped = &wiped_tmp;
+	*wiped = 0;
+
+	if (!_wipe_signature(dev, "software RAID md superblock", name, 4, yes, force, wiped, dev_is_md) ||
+	    !_wipe_signature(dev, "swap signature", name, 10, yes, force, wiped, dev_is_swap) ||
+	    !_wipe_signature(dev, "LUKS signature", name, 8, yes, force, wiped, dev_is_luks))
 		return 0;

 	return 1;
@@ -623,19 +731,20 @@ static int _wipe_known_signatures_with_lvm(struct device *dev, const char *name,

 int wipe_known_signatures(struct cmd_context *cmd, struct device *dev,
 			  const char *name, uint32_t types_to_exclude,
-			  uint32_t types_no_prompt, int yes, force_t force)
+			  uint32_t types_no_prompt, int yes, force_t force,
+			  int *wiped)
 {
 #ifdef BLKID_WIPING_SUPPORT
 	if (find_config_tree_bool(cmd, allocation_use_blkid_wiping_CFG, NULL))
 		return _wipe_known_signatures_with_blkid(dev, name,
 							 types_to_exclude,
 							 types_no_prompt,
-							 yes, force);
+							 yes, force, wiped);
 #endif
 	return _wipe_known_signatures_with_lvm(dev, name,
 					       types_to_exclude,
 					       types_no_prompt,
-					       yes, force);
+					       yes, force, wiped);
 }

 #ifdef __linux__
@@ -655,23 +764,25 @@ static int _snprintf_attr(char *buf, size_t buf_size, const char *sysfs_dir,

 static unsigned long _dev_topology_attribute(struct dev_types *dt,
 					     const char *attribute,
-					     struct device *dev)
+					     struct device *dev,
+					     unsigned long default_value)
 {
 	const char *sysfs_dir = dm_sysfs_dir();
 	char path[PATH_MAX], buffer[64];
 	FILE *fp;
 	struct stat info;
 	dev_t uninitialized_var(primary);
-	unsigned long result = 0UL;
+	unsigned long result = default_value;
+	unsigned long value = 0UL;

 	if (!attribute || !*attribute)
-		return_0;
+		goto_out;

 	if (!sysfs_dir || !*sysfs_dir)
-		return_0;
+		goto_out;

 	if (!_snprintf_attr(path, sizeof(path), sysfs_dir, attribute, dev->dev))
-                return_0;
+                goto_out;

 	/*
 	 * check if the desired sysfs attribute exists
@@ -681,72 +792,79 @@ static unsigned long _dev_topology_attribute(struct dev_types *dt,
 	if (stat(path, &info) == -1) {
 		if (errno != ENOENT) {
 			log_sys_debug("stat", path);
-			return 0;
+			goto out;
 		}
 		if (!dev_get_primary_dev(dt, dev, &primary))
-			return 0;
+			goto out;

 		/* get attribute from partition's primary device */
 		if (!_snprintf_attr(path, sizeof(path), sysfs_dir, attribute, primary))
-			return_0;
+			goto_out;

 		if (stat(path, &info) == -1) {
 			if (errno != ENOENT)
 				log_sys_debug("stat", path);
-			return 0;
+			goto out;
 		}
 	}

 	if (!(fp = fopen(path, "r"))) {
 		log_sys_debug("fopen", path);
-		return 0;
+		goto out;
 	}

 	if (!fgets(buffer, sizeof(buffer), fp)) {
 		log_sys_debug("fgets", path);
-		goto out;
+		goto out_close;
 	}

-	if (sscanf(buffer, "%lu", &result) != 1) {
+	if (sscanf(buffer, "%lu", &value) != 1) {
 		log_warn("sysfs file %s not in expected format: %s", path, buffer);
-		goto out;
+		goto out_close;
 	}

-	log_very_verbose("Device %s %s is %lu bytes.",
-			 dev_name(dev), attribute, result);
+	log_very_verbose("Device %s: %s is %lu%s.",
+			 dev_name(dev), attribute, value, default_value ? "" : " bytes");

-out:
+	result = value >> SECTOR_SHIFT;
+
+out_close:
 	if (fclose(fp))
 		log_sys_debug("fclose", path);

-	return result >> SECTOR_SHIFT;
+out:
+	return result;
 }

 unsigned long dev_alignment_offset(struct dev_types *dt, struct device *dev)
 {
-	return _dev_topology_attribute(dt, "alignment_offset", dev);
+	return _dev_topology_attribute(dt, "alignment_offset", dev, 0UL);
 }

 unsigned long dev_minimum_io_size(struct dev_types *dt, struct device *dev)
 {
-	return _dev_topology_attribute(dt, "queue/minimum_io_size", dev);
+	return _dev_topology_attribute(dt, "queue/minimum_io_size", dev, 0UL);
 }

 unsigned long dev_optimal_io_size(struct dev_types *dt, struct device *dev)
 {
-	return _dev_topology_attribute(dt, "queue/optimal_io_size", dev);
+	return _dev_topology_attribute(dt, "queue/optimal_io_size", dev, 0UL);
 }

 unsigned long dev_discard_max_bytes(struct dev_types *dt, struct device *dev)
 {
-	return _dev_topology_attribute(dt, "queue/discard_max_bytes", dev);
+	return _dev_topology_attribute(dt, "queue/discard_max_bytes", dev, 0UL);
 }

 unsigned long dev_discard_granularity(struct dev_types *dt, struct device *dev)
 {
-	return _dev_topology_attribute(dt, "queue/discard_granularity", dev);
+	return _dev_topology_attribute(dt, "queue/discard_granularity", dev, 0UL);
 }

+int dev_is_rotational(struct dev_types *dt, struct device *dev)
+{
+	return (int) _dev_topology_attribute(dt, "queue/rotational", dev, 1UL);
+}
 #else

 int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
@@ -779,4 +897,8 @@ unsigned long dev_discard_granularity(struct dev_types *dt, struct device *dev)
 	return 0UL;
 }

+int dev_is_rotational(struct dev_types *dt, struct device *dev)
+{
+	return 1;
+}
 #endif
--- a/lib/device/dev-type.h
+++ b/lib/device/dev-type.h
@@ -44,6 +44,7 @@ struct dev_types {
 	int device_mapper_major;
 	int emcpower_major;
 	int power2_major;
+	int dasd_major;
 	struct dev_type_def dev_type_array[NUMBER_OF_MAJORS];
 };

@@ -65,7 +66,7 @@ int dev_is_luks(struct device *dev, uint64_t *signature);
 #define TYPE_DM_SNAPSHOT_COW	0x004
 int wipe_known_signatures(struct cmd_context *cmd, struct device *dev, const char *name,
 			  uint32_t types_to_exclude, uint32_t types_no_prompt,
-			  int yes, force_t force);
+			  int yes, force_t force, int *wiped);

 /* Type-specific device properties */
 unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev);
@@ -82,4 +83,6 @@ unsigned long dev_optimal_io_size(struct dev_types *dt, struct device *dev);
 unsigned long dev_discard_max_bytes(struct dev_types *dt, struct device *dev);
 unsigned long dev_discard_granularity(struct dev_types *dt, struct device *dev);

+int dev_is_rotational(struct dev_types *dt, struct device *dev);
+
 #endif
--- a/Show More
+++ b/Show More