dmstats: add 'interval' and 'interval_ns' report fields

Add a pair of fields to expose the current per-interval duation estimate. The 'interval' field provides a real value in units of seconds and the 'interval_ns' field provides the same quantity expressed as a whole number of nanoseconds.
dmstats: do not use "region_id" in error messages
2025-10-05 07:33:15 +03:00 · 2015-08-13 19:05:53 +01:00 · 2015-08-13 19:05:48 +01:00 · 2015-08-13 19:05:46 +01:00 · 2015-08-13 19:05:45 +01:00 · 2015-08-13 19:05:45 +01:00
507 changed files with 44497 additions and 7061 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,13 +1,16 @@
 *.5
+*.7
 *.8
 *.a
 *.d
 *.o
+*.orig
 *.pc
 *.pot
+*.rej
 *.so
 *.so.*
-*.swp
+*.sw*
 *~

 .export.sym
@@ -17,11 +20,11 @@
 Makefile
 make.tmpl

-configure.h
-version.h
-
 /autom4te.cache/
+/autoscan.log
 /config.log
 /config.status
+/configure.scan
 /cscope.out
+/tags
 /tmp/
--- a/Makefile.in
+++ b/Makefile.in
@@ -1,6 +1,6 @@
 #
 # Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@@ -15,6 +15,8 @@
 srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_builddir = @top_builddir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@

 SUBDIRS = conf daemons include lib libdaemon libdm man scripts tools

@@ -91,10 +93,44 @@ cscope.out:
 all: cscope.out
 endif
 DISTCLEAN_TARGETS += cscope.out
+CLEAN_DIRS += autom4te.cache

-check check_system check_cluster check_local check_lvmetad unit: all
+check check_system check_cluster check_local check_lvmetad check_lvmpolld unit: all
 	$(MAKE) -C test $(@)

+conf.generate: tools
+
+# how to use parenthesis in makefiles
+leftparen:=(
+LVM_VER := $(firstword $(subst $(leftparen), ,$(LVM_VERSION)))
+VER := LVM2.$(LVM_VER)
+# release file name
+FILE_VER := $(VER).tgz
+CLEAN_TARGETS += $(FILE_VER)
+CLEAN_DIRS += $(rpmbuilddir)
+
+dist:
+	@echo "Generating $(FILE_VER)";\
+	(cd $(top_srcdir); git ls-tree -r HEAD --name-only | xargs tar --transform "s,^,$(VER)/," -c) | gzip >$(FILE_VER)
+
+rpm: dist
+	$(RM) -r $(rpmbuilddir)/SOURCES
+	$(MKDIR_P) $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_builddir)/$(FILE_VER) $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/build.inc $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/macros.inc $(rpmbuilddir)/SOURCES
+	$(LN_S) -f $(abs_top_srcdir)/spec/packages.inc $(rpmbuilddir)/SOURCES
+	DM_VER=$$(cut -d- -f1 $(top_srcdir)/VERSION_DM);\
+	GIT_VER=$$(cd $(top_srcdir); git describe | cut -d- --output-delimiter=. -f2,3 || echo 0);\
+	sed -e "s,\(device_mapper_version\) [0-9.]*$$,\1 $$DM_VER," \
+	    -e "s,^\(Version:[^0-9%]*\)[0-9.]*$$,\1 $(LVM_VER)," \
+	    -e "s,^\(Release:[^0-9%]*\)[0-9.]\+,\1 $$GIT_VER," \
+	    $(top_srcdir)/spec/source.inc >$(rpmbuilddir)/SOURCES/source.inc
+	rpmbuild -v --define "_topdir $(rpmbuilddir)" -ba $(top_srcdir)/spec/lvm2.spec
+
+generate: conf.generate
+	$(MAKE) -C conf generate
+
 install_system_dirs:
 	$(INSTALL_DIR) $(DESTDIR)$(DEFAULT_SYS_DIR)
 	$(INSTALL_ROOT_DIR) $(DESTDIR)$(DEFAULT_ARCHIVE_DIR)
@@ -155,7 +191,7 @@ lcov: $(LCOV_TRACES)
 	$(RM) -r $(LCOV_REPORTS_DIR)
 	$(MKDIR_P) $(LCOV_REPORTS_DIR)
 	for i in $(LCOV_TRACES); do \
-		test -s $$i && lc="$$lc $$i"; \
+		test -s $$i -a $$(wc -w <$$i) -ge 100 && lc="$$lc $$i"; \
 	done; \
 	test -z "$$lc" || $(GENHTML) -p @abs_top_builddir@ \
 		-o $(LCOV_REPORTS_DIR) $$lc
@@ -187,3 +223,13 @@ memcheck: test-programs
 ruby-test:
 	$(RUBY) report-generators/test/ts.rb
 endif
+
+ifneq ($(shell which ctags),)
+.PHONY: tags
+all: tags
+tags:
+	test -z "$(shell find $(top_srcdir) -type f -name '*.[ch]' -newer tags | head -1)" || $(RM) tags
+	test -f tags || find $(top_srcdir) -maxdepth 4 -type f -name '*.[ch]' -exec ctags -a '{}' +
+
+DISTCLEAN_TARGETS += tags
+endif
--- a/2
+++ b/2
@@ -18,7 +18,7 @@ Mailing list for general discussion related to LVM2:

 Mailing lists for LVM2 development, patches and commits:
  lvm-devel@redhat.com
-  Subscribe from https://www.redhat.com/mailman/listinfo/linux-lvm
+  Subscribe from https://www.redhat.com/mailman/listinfo/lvm-devel

  lvm2-commits@lists.fedorahosted.org (Read-only archive of commits)
  Subscribe from https://fedorahosted.org/mailman/listinfo/lvm2-commits
--- a/2
+++ b/2
@@ -1 +1 @@
-2.02.118(2)-git (2015-03-24)
+2.02.128(2)-git (2015-08-10)
--- a/2
+++ b/2
@@ -1 +1 @@
-1.02.95-git (2015-03-24)
+1.02.105-git (2015-08-10)
--- a/163
+++ b/163
@@ -1,3 +1,166 @@
+Version 2.02.128 - 
+===================================
+  Check for valid cache mode in validation of cache segment.
+  Enhance internal API cache_set_mode() and cache_set_policy().
+  Enhance toollib's get_cache_params().
+  Runtime detect presence of cache smq policy.
+  Add demo cache-mq  and cache-smq profiles.
+  Add cmd profilable allocation/cache_policy,cache_settings,cache_mode.
+  Require cache_check 0.5.4 for use of --clear-needs-check-flag.
+  Fix lvmetad udev rules to not override SYSTEMD_WANTS, add the service instead.
+
+Version 2.02.127 - 10th August 2015
+===================================
+  Do not init filters, locking, lvmetad, lvmpolld if command doesn't use it.
+  Order fields in struct cmd_context more logically.
+  Add lock_type to lvmcache VG summary and info structs.
+  Fix regression in cache causing some PVs to bypass filters (2.02.105).
+  Make configure --enable-realtime the default now.
+  Update .gitignore and configure.in files to reflect usage of current tree.
+
+Version 2.02.126 - 24th July 2015
+=================================
+  Fix long option hyphen removal. (2.02.122)
+  Fix clvmd freeze if client disappears without first releasing its locks.
+  Fix lvconvert segfaults while performing snapshots merge.
+  Ignore errors during detection if use_blkid_wiping=1 and --force is used.
+  Recognise DM_ABORT_ON_INTERNAL_ERRORS env var override in lvm logging fn.
+  Fix alloc segfault when extending LV with fewer stripes than in first seg.
+  Fix handling of cache policy name.
+  Set cache policy before with the first lvm2 cache pool metadata commit.
+  Fix detection of thin-pool overprovisioning (2.02.124).
+  Fix lvmpolld segfaults on 32 bit architectures.
+  Add lvmlockd lock_args validation to vg_validate.
+  Fix ignored --startstopservices option if running lvmconf with systemd.
+  Hide sanlock LVs when processing LVs in VG unless named or --all used.
+
+Version 2.02.125 - 7th July 2015
+================================
+  Fix getline memory usage in lvmpolld.
+  Add support --clear-needs-check-flag for cache_check of cache pool metadata.
+  Add lvmetactl for developer use only.
+  Rename global/lock_retries to lvmlockd_retries.
+  Replace --enable-lvmlockd by --enable-lockd-sanlock and --enable-lockd-dlm.
+
+Version 2.02.124 - 3rd July 2015
+================================
+  Move sending thin pool messages from resume to suspend phase.
+  Report warning when pool is overprovisioned and not auto resized.
+  Recognize free-form date/time values for lv_time field in selection criteria.
+  Added experimental lvmlockd with configure --enable-lvmlockd.
+  Fix regression in select to match string fields if using synonyms (2.02.123).
+  Fix regression when printing more lv names via display_lvname (2.02.122).
+  Add missing error logging to unlock_vg and sync_local_dev_names callers.
+
+Version 2.02.123 - 30th June 2015
+=================================
+  Add report/time_format lvm.conf option to define time format for report.
+  Fix makefile shell compare == when building lvmetad lvmpolld (2.02.120).
+  Add --type full to lvmconfig for full configuration tree view.
+  Add undocumented environment variables to lvm man page. (2.02.119)
+  Add device synchronization point before activating a new snapshot.
+  Add --withspaces to lvmconfig to add spaces in output for better readability.
+  Add custom main function to libdaemon.
+  Use lvmetad to track out-of-date metadata discovered.
+
+Version 2.02.122 - 20th June 2015
+=================================
+  Flush stdout before printing to stderr.
+  Use pre-allocated buffer for printed LV names in display_lvname.
+  Support thins with size of external origin unaligned with thin pool chunk.
+  Allow extension of reduced thin volumes with external origins.
+  Consider snapshot and origin LV as unusable if component devices suspended.
+  Fix lvmconfig segfault on settings with undefined default value (2.02.120).
+  Add explicit 's' (shared) LV activation mode.
+  Ignore hyphens in long options names (i.e. --long-option == --longoption).
+
+Version 2.02.121 - 12th June 2015
+=================================
+  Distinguish between on-disk and lvmetad versions of text metadata.
+  Remove DL_LIBS from Makefiles for daemons that don't need them.
+  Zero errno in before strtoul call in dmsetup if tested after the call.
+  Zero errno in before strtoul call in lvmpolld.
+  Fix a segfault in pvscan --cache --background command.
+  Fix test for AREA_PV when checking for failed mirrors.
+  Do not use --sysinit in lvm2-activation{-early,-net}.service if lvmpolld used.
+  Maintain outdated PV info in lvmetad till all old metadata is gone from disk.
+  Do not fail polling when poll LV not found (already finished or removed).
+  Replace poll_get_copy_vg/lv fns with vg_read() and find_lv() in polldaemon.
+  Close all device fds only in before sleep call in polldaemon.
+  Simplify Makefile targets that generate exported symbols.
+  Move various -D settings from Makefiles to configure.h.
+
+Version 2.02.120 - 15th May 2015
+================================
+  Make various adjustments to Makefile compilation flags.
+  Add lvmpolld debug message class.
+  Add lvmpolld client mode for querying running server instance for status info.
+  Fix some libdaemon socket creation and reuse error paths.
+  Daemons (libdaemon) support exit on idle also in non-systemd environment.
+  Provide make dist and make rpm targets
+  Configure lvm.conf for use_lvmetad and use_lvmpolld.
+  Add lvpoll for cmdline communication with lvmpolld.
+  Add lvmpolld acting as a free-standing version of polldaemon.
+  Avoid repeated identical lvmetad VG lookups in commands processing all VGs.
+  Handle switches to alternative duplicate PVs efficiently with lvmetad.
+  Properly validate PV size for pvcreate --restorefile.
+  Fix check if pvcreate wiped device (2.02.117).
+  Fix storing of vgid when caching metadata (2.02.118).
+  Fix recursive lvm-config man page. (2.02.119)
+  Refactor polldaemon interfaces to poll every operation by VG/LV couple
+  Skip wait after testing in _wait_for_single_lv when polling finished
+  Return 'None' in python for empty string properties instead of crashing.
+  Distinguish signed numerical property type in reports for lvm2app library.
+  Reread raid completion status immediately when progress appears to be zero.
+  lvm2app closes locking on lvm_quit().
+  Configure detects /run or /var/run.
+  Add missing newline in clvmd --help output.
+
+Version 2.02.119 - 2nd May 2015
+===============================
+  New LVM_LOG_FILE_EPOCH, LVM_EXPECTED_EXIT_STATUS env vars. Man page to follow.
+  Remove detailed content from lvm.conf man page: use lvmconfig instead.
+  Generate complete config files with lvmconfig or 'make generate'.
+  Also display info on deprecated config with lvmconfig --withcomments.
+  Display version since which config is deprecated in lvmconfig --withversions.
+  Add --showdeprecated to lvmconfig to also display deprecated settings.
+  Hide deprecated settings in lvmconfig output for all types but current,diff.
+  Introduce support for exit on idle feature in libdaemon
+  Add --showunsupported to lvmconfig to also display unsupported settings.
+  Display unsupported settings for lvmconfig --type current,diff only by default
+  Honour lvmconfig --ignoreunsupported and --ignoreadvanced for all --type.
+  Make python bindings usable with python3 (and compatible with 2.6 & 2.7).
+  Add lvmconfig -l|--list as shortcut for lvmconfig --type list --withsummary.
+  Add lvmconfig --type list to display plain list of configuration settings.
+  Introduce lvmconfig as the preferred form of 'lvm dumpconfig'.
+  Add lv_ancestors and lv_descendants reporting fields.
+  Add --ignorelocal option to dumpconfig to ignore the local section.
+  Close connection to lvmetad after fork.
+  Make lvchange able to resume background pvmove polling again.
+  Split pvmove update metadata fn in an initial one and a subsequent one.
+  Refactor shared pvmove and lvconvert code into new _poll files.
+  Add --unconfigured option to dumpconfig to print strings unconfigured.
+  Add --withsummary option to dumpconfig to print first line - summary comment.
+  Use number of device holders to help choose between duplicate PVs.
+  Try to make lvmetad and non-lvmetad duplicate PV handling as similar as poss.
+  Issue warnings about duplicate PVs discovered by lvmetad.
+  Track alternative devices with matching PVIDs in lvmetad.
+  Check for lvm binary in blkdeactivate and skip LVM processing if not present.
+  Add --enable-halvm and --disable-halvm options to lvmconf script.
+  Add --services, --mirrorservice and --startstopservices option to lvmconf.
+  Use proper default value of global/use_lvmetad when processing lvmconf script.
+  Respect allocation/cling_tag_list during intial contiguous allocation.
+  Add A_PARTITION_BY_TAGS set when allocated areas should not share tags.
+  Make changes persist with python addTag/removeTag.
+  Set correct vgid when updating cache when writing PV metadata.
+  More efficient clvmd singlenode locking emulation.
+  Reject lvcreate -m with raid4/5/6 to avoid unexpected layout.
+  Don't skip invalidation of cached orphans if vg write lck is held (2.02.118).
+  Log relevant PV tags when using cling allocation.
+  Add str_list_add_list() to combine two lists.
+  Fix LV processing with selection to always do the selection on initial state.
+  Add internal LV_REMOVED LV status flag.
+
 Version 2.02.118 - 23rd March 2015
 ==================================
  Store metadata size + checksum in lvmcache and add struct lvmcache_vgsummary.
--- a/83
+++ b/83
@@ -1,3 +1,86 @@
+Version 1.02.105 - 
+===================================
+  Add more arg validation for dm_tree_node_add_cache_target().
+  Add --alldevices switch to replace use of --force for stats create / delete.
+
+Version 1.02.104 - 10th August 2015
+===================================
+  Add dmstats.8 man page
+  Add dmstats --segments switch to create one region per device segment.
+  Add dmstats --regionid, --allregions to specify a single / all stats regions.
+  Add dmstats --allprograms for stats commands that filter by program ID.
+  Add dmstats --auxdata and --programid args to specify aux data and program ID.
+  Add report stats sub-command to provide repeating stats reports.
+  Add clear, delete, list, and print stats sub-commands.
+  Add create stats sub-command and --start, --length, --areas and --areasize.
+  Recognize 'dmstats' as an alias for 'dmsetup stats' when run with this name.
+  Add a 'stats' command to dmsetup to configure, manage and report stats data.
+  Add statistics fields to dmsetup -o.
+  Add libdm-stats library to allow management of device-mapper statistics.
+  Add --nosuffix to suppress dmsetup unit suffixes in report output.
+  Add --units to control dmsetup report field output units.
+  Add support to redisplay column headings for repeating column reports.
+  Fix report header and row resource leaks.
+  Report timestamps of ioctls with dmsetup -vvv.
+  Recognize report field name variants without any underscores too.
+  Add dmsetup --interval and --count to repeat reports at specified intervals.
+  Add dm_timestamp functions to libdevmapper.
+  Recognise vg/lv name format in dmsetup.
+  Move size display code to libdevmapper as dm_size_to_string.
+
+Version 1.02.103 - 24th July 2015
+=================================
+  Introduce libdevmapper wrappers for all malloc-related functions.
+
+Version 1.02.102 - 7th July 2015
+================================
+  Include tool.h for default non-library use.
+  Introduce format macros with embedded % such as FMTu64.
+
+Version 1.02.101 - 3rd July 2015
+================================
+  Add experimental support to passing messages in suspend tree.
+  Add dm_report_value_cache_{set,get} to support caching during report/select.
+  Add dm_report_reserved_handler to handle report reserved value actions.
+  Support dynamic value in select: DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE.
+  Support fuzzy names in select: DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES.
+  Thin pool trace messages show a device name and major:minor.
+
+Version 1.02.100 - 30th June 2015
+=================================
+  Add since, after, until and before time operators to be used in selection.
+  Add support for time in reports and selection: DM_REPORT_FIELD_TYPE_TIME.
+  Support report reserved value ranges: DM_REPORT_FIELD_RESERVED_VALUE_RANGE.
+  Support report reserved value names: DM_REPORT_FIELD_RESERVED_VALUE_NAMED.
+  Add DM_CONFIG_VALUE_FMT_{INT_OCTAL,STRING_NO_QUOTES} config value format flag.
+  Add DM_CONFIG_VALUE_FMT_COMMON_{ARRAY,EXTRA_SPACE} config value format flag.
+  Add dm_config_value_{get,set}_format_flags to get and set config value format.
+
+Version 1.02.99 - 20th June 2015
+================================
+  New dm_tree_node_set_thin_pool_read_only(DM_1_02_99) for read-only thin pool.
+  Enhance error message when thin-pool message fails.
+  Fix dmeventd logging to avoid threaded use of static variable.
+  Remove redundant dmeventd SIGALRM coded.
+
+Version 1.02.98 - 12th June 2015
+================================
+  Add dm_task_get_errno() to return any unexpected errno from a dm ioctl call.
+  Use copy of errno made after each dm ioctl call in case errno changes later.
+
+Version 1.02.97 - 15th May 2015
+===============================
+  New dm_task_get_info(DM_1_02_97) supports internal_suspend state.
+  New symbols are versioned and comes with versioned symbol name (DM_1_02_97).
+
+Version 1.02.96 - 2nd May 2015
+==============================
+  Fix selection to not match if using reserved value in criteria with >,<,>=,<.
+  Fix selection to not match reserved values for size fields if using >,<,>=,<.
+  Include uuid or device number in log message after ioctl failure.
+  Add DM_INTERNAL_SUSPEND_FLAG to dm-ioctl.h.
+  Install blkdeactivate script and its man page with make install_device-mapper.
+
 Version 1.02.95 - 15th March 2015
 =================================
  Makefile regenerated.
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.14.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.15 -*- Autoconf -*-

-# Copyright (C) 1996-2013 Free Software Foundation, Inc.
+# Copyright (C) 1996-2014 Free Software Foundation, Inc.

 # This file is free software; the Free Software Foundation
 # gives unlimited permission to copy and/or distribute it,
--- a/conf/.gitignore
+++ b/conf/.gitignore
@@ -0,0 +1,4 @@
+command_profile_template.profile
+example.conf
+lvmlocal.conf
+metadata_profile_template.profile
--- a/conf/Makefile.in
+++ b/conf/Makefile.in
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@@ -20,12 +20,20 @@ CONFDEST=lvm.conf
 CONFLOCAL=lvmlocal.conf

 PROFILE_TEMPLATES=command_profile_template.profile metadata_profile_template.profile
-PROFILES=$(PROFILE_TEMPLATES) $(srcdir)/thin-generic.profile $(srcdir)/thin-performance.profile
+PROFILES=$(PROFILE_TEMPLATES) \
+	$(srcdir)/cache-mq.profile \
+	$(srcdir)/cache-smq.profile \
+	$(srcdir)/thin-generic.profile \
+	$(srcdir)/thin-performance.profile

 include $(top_builddir)/make.tmpl

 .PHONY: install_conf install_localconf install_profiles

+generate:
+	(cat $(top_srcdir)/conf/example.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --ignorelocal --withspaces) > example.conf.in
+	(cat $(top_srcdir)/conf/lvmlocal.conf.base && LD_LIBRARY_PATH=$(top_builddir)/libdm:$(LD_LIBRARY_PATH) $(top_builddir)/tools/lvm dumpconfig --type default --unconfigured --withcomments --withspaces local) > lvmlocal.conf.in
+
 install_conf: $(CONFSRC)
 	@if [ ! -e $(confdir)/$(CONFDEST) ]; then \
 		echo "$(INSTALL_WDATA) -D $< $(confdir)/$(CONFDEST)"; \
--- a/conf/cache-mq.profile
+++ b/conf/cache-mq.profile
@@ -0,0 +1,20 @@
+# Demo configuration 'mq' cache policy
+#
+# Note: This policy has been deprecated in favor of the smq policy
+# keyword "default" means, setting is left with kernel defaults.
+#
+
+allocation {
+	cache_pool_chunk_size = 64
+	cache_mode = "writethrough"
+	cache_policy = "mq"
+	cache_settings {
+		mq {
+			sequential_threshold = "default"	#  #nr_sequential_ios
+			random_threshold = "default"		#  #nr_random_ios
+			read_promote_adjustment = "default"
+			write_promote_adjustment = "default"
+			discard_promote_adjustment = "default"
+		}
+	}
+}
--- a/conf/cache-smq.profile
+++ b/conf/cache-smq.profile
@@ -0,0 +1,14 @@
+# Demo configuration 'smq' cache policy
+#
+# The stochastic multi-queue (smq) policy addresses some of the problems
+# with the multiqueue (mq) policy and uses less memory.
+#
+
+allocation {
+	cache_pool_chunk_size = 64
+	cache_mode = "writethrough"
+	cache_policy = "smq"
+	cache_settings {
+	        # currently no settins for "smq" policy
+	}
+}
--- a/conf/example.conf.base
+++ b/conf/example.conf.base
@@ -0,0 +1,23 @@
+# This is an example configuration file for the LVM2 system.
+# It contains the default settings that would be used if there was no
+# @DEFAULT_SYS_DIR@/lvm.conf file.
+#
+# Refer to 'man lvm.conf' for further information including the file layout.
+#
+# Refer to 'man lvm.conf' for information about how settings configured in
+# this file are combined with built-in values and command line options to
+# arrive at the final values used by LVM.
+#
+# Refer to 'man lvmconfig' for information about displaying the built-in
+# and configured values used by LVM.
+#
+# If a default value is set in this file (not commented out), then a
+# new version of LVM using this file will continue using that value,
+# even if the new version of LVM changes the built-in default value.
+#
+# To put this file in a different directory and override @DEFAULT_SYS_DIR@ set
+# the environment variable LVM_SYSTEM_DIR before running the tools.
+#
+# N.B. Take care that each setting only appears once if uncommenting
+# example settings in this file.
+
--- a/conf/example.conf.in
+++ b/conf/example.conf.in
--- a/conf/lvmlocal.conf.base
+++ b/conf/lvmlocal.conf.base
@@ -0,0 +1,19 @@
+# This is a local configuration file template for the LVM2 system
+# which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
+#
+# Refer to 'man lvm.conf' for information about the file layout.
+#
+# To put this file in a different directory and override
+# @DEFAULT_SYS_DIR@ set the environment variable LVM_SYSTEM_DIR before
+# running the tools.
+#
+# The lvmlocal.conf file is normally expected to contain only the
+# "local" section which contains settings that should not be shared or
+# repeated among different hosts.  (But if other sections are present,
+# they *will* get processed.  Settings in this file override equivalent
+# ones in lvm.conf and are in turn overridden by ones in any enabled
+# lvm_<tag>.conf files.)
+#
+# Please take care that each setting only appears once if uncommenting
+# example settings in this file and never copy this file between hosts.
+
--- a/conf/lvmlocal.conf.in
+++ b/conf/lvmlocal.conf.in
@@ -1,17 +1,6 @@
 # This is a local configuration file template for the LVM2 system
 # which should be installed as @DEFAULT_SYS_DIR@/lvmlocal.conf .
 #
-# This file allows you to assign a unique identity to a host running
-# LVM2 that is permitted to access storage devices visible to more than
-# one machine simultaneously.  
-#
-# You must ensure that every such host uses a different system_id
-# identifier, otherwise LVM2 cannot protect you from simultaneous
-# access from multiple hosts and possible data corruption.
-#
-# Refer to 'man lvmsystemid' for information about the correct ways
-# to use this and its limitations.
-#
 # Refer to 'man lvm.conf' for information about the file layout.
 #
 # To put this file in a different directory and override
@@ -26,29 +15,43 @@
 # lvm_<tag>.conf files.)
 #
 # Please take care that each setting only appears once if uncommenting
-# example settings in this file and never copy this file between
-# hosts to avoid accidentally assigning the same system ID to
-# more than one host!
+# example settings in this file and never copy this file between hosts.

+
+# Configuration section local.
+# LVM settings that are specific to the local host.
 local {
-    # This defines the system ID of the local host.  This is used
-    # when global/system_id_source is set to "lvmlocal" in the main
-    # configuration file, conventionally @DEFAULT_SYS_DIR@/lvm.conf.  
-    # When used, it must be set to a unique value - often a hostname -
-    # across all the hosts sharing access to the storage.
-    #
-    # By default, no system_id is set.
-    # system_id = ""
-    #
-    # Set the system_id to the string "host1".
-    # system_id = "host1"

-    # This defines a list of extra system_ids other than the local
-    # system_id that the local host is allowed to access.  These are
-    # used for all values of global/system_id_source except "none".
-    #
-    # Only use this if you have read 'man lvmsystemid' and you are sure
-    # you understand why you need to use it!
-    #
-    # extra_system_ids = []
+	# Configuration option local/system_id.
+	# Defines the local system ID for lvmlocal mode.
+	# This is used when global/system_id_source is set
+	# to 'lvmlocal' in the main configuration file,
+	# e.g. lvm.conf.
+	# When used, it must be set to a unique value
+	# among all hosts sharing access to the storage,
+	# e.g. a host name.
+	# Example:
+	# Set no system ID.
+	# system_id = ""
+	# Example:
+	# Set the system_id to the string 'host1'.
+	# system_id = "host1"
+	# system_id = ""
+
+	# Configuration option local/extra_system_ids.
+	# A list of extra VG system IDs the local host can access.
+	# VGs with the system IDs listed here (in addition
+	# to the host's own system ID) can be fully accessed
+	# by the local host.  (These are system IDs that the
+	# host sees in VGs, not system IDs that identify the
+	# local host, which is determined by system_id_source.)
+	# Use this only after consulting 'man lvmsystemid'
+	# to be certain of correct usage and possible dangers.
+	# This configuration option does not have a default value defined.
+
+	# Configuration option local/host_id.
+	# The lvmlockd sanlock host_id.
+	# This must be a unique among all hosts,
+	# and must be between 1 and 2000.
+	# host_id = 0
 }
--- a/1365
+++ b/1365
--- a/configure.in
+++ b/configure.in
@@ -1,6 +1,6 @@
 ###############################################################################
 ## Copyright (C) 2000-2004 Sistina Software, Inc. All rights reserved.
-## Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+## Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 ##
 ## This copyrighted material is made available to anyone wishing to use,
 ## modify, copy, or redistribute it subject to the terms and conditions
@@ -26,10 +26,9 @@ AC_CONFIG_AUX_DIR(autoconf)
 dnl -- Get system type
 AC_CANONICAL_TARGET([])

+AS_IF([test -z "$CFLAGS"], [COPTIMISE_FLAG="-O2"])
 case "$host_os" in
 	linux*)
-		CFLAGS="$CFLAGS"
-		COPTIMISE_FLAG="-O2"
 		CLDFLAGS="$CLDFLAGS -Wl,--version-script,.export.sym"
 		ELDFLAGS="-Wl,--export-dynamic"
 		# FIXME Generate list and use --dynamic-list=.dlopen.sym
@@ -39,6 +38,10 @@ case "$host_os" in
 		LIB_SUFFIX=so
 		DEVMAPPER=yes
 		LVMETAD=no
+		LVMPOLLD=no
+		LVMLOCKD=no
+		LOCKDSANLOCK=no
+		LOCKDDLM=no
 		ODIRECT=yes
 		DM_IOCTLS=yes
 		SELINUX=yes
@@ -48,7 +51,6 @@ case "$host_os" in
 		;;
 	darwin*)
 		CFLAGS="$CFLAGS -no-cpp-precomp -fno-common"
-		COPTIMISE_FLAG="-O2"
 		CLDFLAGS="$CLDFLAGS"
 		ELDFLAGS=
 		CLDWHOLEARCHIVE="-all_load"
@@ -68,8 +70,12 @@ esac
 dnl -- Checks for programs.
 AC_PROG_SED
 AC_PROG_AWK
+save_CFLAGS=$CFLAGS
+save_CXXFLAGS=$CXXFLAGS
 AC_PROG_CC
 AC_PROG_CXX
+CFLAGS=$save_CFLAGS
+CXXFLAGS=$save_CXXFLAGS

 dnl probably no longer needed in 2008, but...
 AC_PROG_GCC_TRADITIONAL
@@ -85,14 +91,19 @@ AC_PATH_TOOL(CSCOPE_CMD, cscope)
 dnl -- Check for header files.
 AC_HEADER_DIRENT
 AC_HEADER_MAJOR
+AC_HEADER_STDBOOL
 AC_HEADER_STDC
 AC_HEADER_SYS_WAIT
 AC_HEADER_TIME

-AC_CHECK_HEADERS([locale.h stddef.h syslog.h sys/file.h sys/time.h assert.h \
-  langinfo.h libgen.h signal.h sys/mman.h sys/resource.h sys/utsname.h \
-  sys/wait.h time.h], ,
-  [AC_MSG_ERROR(bailing out)])
+AC_CHECK_HEADERS([assert.h ctype.h dirent.h errno.h fcntl.h float.h \
+  getopt.h inttypes.h langinfo.h libgen.h limits.h locale.h paths.h \
+  signal.h stdarg.h stddef.h stdio.h stdlib.h string.h sys/file.h \
+  sys/ioctl.h syslog.h sys/mman.h sys/param.h sys/resource.h sys/stat.h \
+  sys/time.h sys/types.h sys/utsname.h sys/wait.h time.h \
+  unistd.h], , [AC_MSG_ERROR(bailing out)])
+
+AC_CHECK_HEADERS(termios.h sys/statvfs.h sys/timerfd.h)

 case "$host_os" in
 	linux*)
@@ -101,16 +112,13 @@ case "$host_os" in
 		AC_CHECK_HEADERS(machine/endian.h sys/disk.h,,AC_MSG_ERROR(bailing out)) ;;
 esac

-AC_CHECK_HEADERS([ctype.h dirent.h errno.h fcntl.h getopt.h inttypes.h limits.h \
-  stdarg.h stdio.h stdlib.h string.h sys/ioctl.h sys/param.h sys/stat.h \
-  sys/types.h unistd.h], , [AC_MSG_ERROR(bailing out)])
-AC_CHECK_HEADERS(termios.h sys/statvfs.h)
-
 ################################################################################
 dnl -- Check for typedefs, structures, and compiler characteristics.
 AC_C_CONST
 AC_C_INLINE
 AC_CHECK_MEMBERS([struct stat.st_rdev])
+AC_CHECK_TYPES([ptrdiff_t])
+AC_STRUCT_TM
 AC_TYPE_OFF_T
 AC_TYPE_PID_T
 AC_TYPE_SIGNAL
@@ -126,15 +134,13 @@ AC_TYPE_UINT8_T
 AC_TYPE_UINT16_T
 AC_TYPE_UINT32_T
 AC_TYPE_UINT64_T
-AC_CHECK_MEMBERS([struct stat.st_rdev])
-AC_STRUCT_TM

 ################################################################################
 dnl -- Check for functions
-AC_CHECK_FUNCS([ftruncate gethostname getpagesize \
-  gettimeofday memset mkdir mkfifo rmdir munmap nl_langinfo setenv setlocale \
-  strcasecmp strchr strcspn strspn strdup strncasecmp strerror strrchr \
-  strstr strtol strtoul uname], , [AC_MSG_ERROR(bailing out)])
+AC_CHECK_FUNCS([ftruncate gethostname getpagesize gettimeofday localtime_r \
+  memchr memset mkdir mkfifo munmap nl_langinfo realpath rmdir setenv \
+  setlocale strcasecmp strchr strcspn strdup strerror strncasecmp strndup \
+  strrchr strspn strstr strtol strtoul uname], , [AC_MSG_ERROR(bailing out)])
 AC_FUNC_ALLOCA
 AC_FUNC_CLOSEDIR_VOID
 AC_FUNC_CHOWN
@@ -142,6 +148,7 @@ AC_FUNC_FORK
 AC_FUNC_LSTAT
 AC_FUNC_MALLOC
 AC_FUNC_MEMCMP
+AC_FUNC_MKTIME
 AC_FUNC_MMAP
 AC_FUNC_REALLOC
 AC_FUNC_STAT
@@ -168,6 +175,9 @@ AC_SUBST(HAVE_FULL_RELRO)
 ################################################################################
 dnl -- Prefix is /usr by default, the exec_prefix default is setup later
 AC_PREFIX_DEFAULT(/usr)
+if test "$prefix" = NONE; then
+  datarootdir=${ac_default_prefix}/share
+fi

 ################################################################################
 dnl -- Setup the ownership of the files
@@ -198,6 +208,7 @@ AC_ARG_WITH(device-uid,
 			   [set the owner used for new device nodes [UID=0]]),
 	    DM_DEVICE_UID=$withval, DM_DEVICE_UID=0)
 AC_MSG_RESULT($DM_DEVICE_UID)
+AC_DEFINE_UNQUOTED([DM_DEVICE_UID], [$DM_DEVICE_UID], [Define default owner for device node])

 ################################################################################
 dnl -- Setup device group ownership
@@ -208,6 +219,7 @@ AC_ARG_WITH(device-gid,
 			   [set the group used for new device nodes [GID=0]]),
 	    DM_DEVICE_GID=$withval, DM_DEVICE_GID=0)
 AC_MSG_RESULT($DM_DEVICE_GID)
+AC_DEFINE_UNQUOTED([DM_DEVICE_GID], [$DM_DEVICE_GID], [Define default group for device node])

 ################################################################################
 dnl -- Setup device mode
@@ -218,6 +230,7 @@ AC_ARG_WITH(device-mode,
 			   [set the mode used for new device nodes [MODE=0600]]),
 	    DM_DEVICE_MODE=$withval, DM_DEVICE_MODE=0600)
 AC_MSG_RESULT($DM_DEVICE_MODE)
+AC_DEFINE_UNQUOTED([DM_DEVICE_MODE], [$DM_DEVICE_MODE], [Define default mode for device node])

 AC_MSG_CHECKING(when to create device nodes)
 AC_ARG_WITH(device-nodes-on,
@@ -257,8 +270,13 @@ AC_ARG_ENABLE(lvm1_fallback,
 AC_MSG_RESULT($LVM1_FALLBACK)

 if test "$LVM1_FALLBACK" = yes; then
+	DEFAULT_FALLBACK_TO_LVM1=1
 	AC_DEFINE([LVM1_FALLBACK], 1, [Define to 1 if 'lvm' should fall back to using LVM1 binaries if device-mapper is missing from the kernel])
+else
+	DEFAULT_FALLBACK_TO_LVM1=0
 fi
+AC_DEFINE_UNQUOTED(DEFAULT_FALLBACK_TO_LVM1, [$DEFAULT_FALLBACK_TO_LVM1],
+		   [Fall back to LVM1 by default if device-mapper is missing from the kernel.])

 ################################################################################
 dnl -- format1 inclusion type
@@ -546,6 +564,12 @@ case "$CACHE" in
 *) AC_MSG_ERROR([--with-cache parameter invalid]) ;;
 esac

+dnl -- cache_check needs-check flag
+AC_ARG_ENABLE(cache_check_needs_check,
+	      AC_HELP_STRING([--disable-cache_check_needs_check],
+			     [required if cache_check version is < 0.5]),
+	      CACHE_CHECK_NEEDS_CHECK=$enableval, CACHE_CHECK_NEEDS_CHECK=yes)
+
 # Test if necessary cache tools are available
 # if not - use plain defaults and warn user
 case "$CACHE" in
@@ -559,6 +583,28 @@ case "$CACHE" in
 			CACHE_CONFIGURE_WARN=y
 		fi
 	fi
+	if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+		$CACHE_CHECK_CMD -V 2>/dev/null >conftest.tmp
+		read -r CACHE_CHECK_VSN < conftest.tmp
+		IFS=. read -r CACHE_CHECK_VSN_MAJOR CACHE_CHECK_VSN_MINOR CACHE_CHECK_VSN_PATCH < conftest.tmp
+		rm -f conftest.tmp
+
+		# Require version >= 0.5.4 for --clear-needs-check-flag
+		if test -z "$CACHE_CHECK_VSN_MAJOR" \
+			|| test -z "$CACHE_CHECK_VSN_MINOR" \
+			|| test -z "$CACHE_CHECK_VSN_PATCH"; then
+			AC_MSG_WARN([$CACHE_CHECK_CMD: Bad version "$CACHE_CHECK_VSN" found])
+			CACHE_CHECK_VERSION_WARN=y
+			CACHE_CHECK_NEEDS_CHECK=no
+		elif test "$CACHE_CHECK_VSN_MAJOR" -eq 0 ; then
+			if test "$CACHE_CHECK_VSN_MINOR" -lt 5 \
+				|| test "$CACHE_CHECK_VSN_MINOR" -eq 5 -a "$CACHE_CHECK_VSN_PATCH" -lt 4; then
+				AC_MSG_WARN([$CACHE_CHECK_CMD: Old version "$CACHE_CHECK_VSN" found])
+				CACHE_CHECK_VERSION_WARN=y
+				CACHE_CHECK_NEEDS_CHECK=no
+			fi
+		fi
+	fi
 	# Empty means a config way to ignore cache dumping
 	if test "$CACHE_DUMP_CMD" = "autodetect"; then
 		AC_PATH_TOOL(CACHE_DUMP_CMD, cache_dump)
@@ -586,6 +632,12 @@ case "$CACHE" in
 			CACHE_CONFIGURE_WARN=y
 		}
 	fi
+
+	AC_MSG_CHECKING([whether cache_check supports the needs-check flag])
+	AC_MSG_RESULT([$CACHE_CHECK_NEEDS_CHECK])
+	if test "$CACHE_CHECK_NEEDS_CHECK" = yes; then
+		AC_DEFINE([CACHE_CHECK_NEEDS_CHECK], 1, [Define to 1 if the external 'cache_check' tool requires the --clear-needs-check-flag option])
+	fi
 	;;
 esac

@@ -614,8 +666,8 @@ AC_MSG_RESULT($READLINE)
 dnl -- Disable realtime clock support
 AC_MSG_CHECKING(whether to enable realtime support)
 AC_ARG_ENABLE(realtime,
-	      AC_HELP_STRING([--enable-realtime], [enable realtime clock support]),
-	      REALTIME=$enableval)
+	      AC_HELP_STRING([--disable-realtime], [disable realtime clock support]),
+	      REALTIME=$enableval, REALTIME=yes)
 AC_MSG_RESULT($REALTIME)

 ################################################################################
@@ -645,28 +697,32 @@ pkg_config_init() {
 }

 ################################################################################
+AC_MSG_CHECKING(for default run directory)
+RUN_DIR="/run"
+test -d "/run" || RUN_DIR="/var/run"
+AC_MSG_RESULT($RUN_DIR)
 dnl -- Set up pidfile and run directory
 AH_TEMPLATE(DEFAULT_PID_DIR)
 AC_ARG_WITH(default-pid-dir,
 	    AC_HELP_STRING([--with-default-pid-dir=PID_DIR],
-			   [Default directory to keep PID files in. [/var/run]]),
-	    DEFAULT_PID_DIR="$withval", DEFAULT_PID_DIR="/var/run")
+			   [Default directory to keep PID files in. [autodetect]]),
+	    DEFAULT_PID_DIR="$withval", DEFAULT_PID_DIR=$RUN_DIR)
 AC_DEFINE_UNQUOTED(DEFAULT_PID_DIR, ["$DEFAULT_PID_DIR"],
 		   [Default directory to keep PID files in.])

 AH_TEMPLATE(DEFAULT_DM_RUN_DIR, [Name of default DM run directory.])
 AC_ARG_WITH(default-dm-run-dir,
 	    AC_HELP_STRING([--with-default-dm-run-dir=DM_RUN_DIR],
-			   [ Default DM run directory. [/var/run]]),
-	    DEFAULT_DM_RUN_DIR="$withval", DEFAULT_DM_RUN_DIR="/var/run")
+			   [ Default DM run directory. [autodetect]]),
+	    DEFAULT_DM_RUN_DIR="$withval", DEFAULT_DM_RUN_DIR=$RUN_DIR)
 AC_DEFINE_UNQUOTED(DEFAULT_DM_RUN_DIR, ["$DEFAULT_DM_RUN_DIR"],
 		   [Default DM run directory.])

 AH_TEMPLATE(DEFAULT_RUN_DIR, [Name of default LVM run directory.])
 AC_ARG_WITH(default-run-dir,
 	    AC_HELP_STRING([--with-default-run-dir=RUN_DIR],
-			   [Default LVM run directory. [/var/run/lvm]]),
-	    DEFAULT_RUN_DIR="$withval", DEFAULT_RUN_DIR="/var/run/lvm")
+			   [Default LVM run directory. [autodetect_run_dir/lvm]]),
+	    DEFAULT_RUN_DIR="$withval", DEFAULT_RUN_DIR="$RUN_DIR/lvm")
 AC_DEFINE_UNQUOTED(DEFAULT_RUN_DIR, ["$DEFAULT_RUN_DIR"],
 		   [Default LVM run directory.])

@@ -1016,6 +1072,13 @@ if test "$TESTING" = yes; then
 	PKG_CHECK_MODULES(CUNIT, cunit >= 2.0)
 fi

+################################################################################
+dnl -- Set LVM2 testsuite data
+TESTSUITE_DATA='${datarootdir}/lvm2-testsuite'
+# double eval needed ${datarootdir} -> ${prefix}/share -> real path
+AC_DEFINE_UNQUOTED(TESTSUITE_DATA, ["$(eval echo $(eval echo $TESTSUITE_DATA))"], [Path to testsuite data])
+
+
 ################################################################################
 dnl -- Enable valgrind awareness of memory pools
 AC_MSG_CHECKING(whether to enable valgrind awareness of pools)
@@ -1061,7 +1124,106 @@ AC_MSG_RESULT($LVMETAD)

 BUILD_LVMETAD=$LVMETAD

+################################################################################
+dnl -- Build lvmpolld
+AC_MSG_CHECKING(whether to build lvmpolld)
+AC_ARG_ENABLE(lvmpolld,
+	      AC_HELP_STRING([--enable-lvmpolld],
+			     [enable the LVM Polling Daemon]),
+	      LVMPOLLD=$enableval)
+AC_MSG_RESULT($LVMPOLLD)
+
+BUILD_LVMPOLLD=$LVMPOLLD
+
+################################################################################
+dnl -- Build lockdsanlock
+AC_MSG_CHECKING(whether to build lockdsanlock)
+AC_ARG_ENABLE(lockd-sanlock,
+	      AC_HELP_STRING([--enable-lockd-sanlock],
+			     [enable the LVM lock daemon using sanlock]),
+	      LOCKDSANLOCK=$enableval)
+AC_MSG_RESULT($LOCKDSANLOCK)
+
+BUILD_LOCKDSANLOCK=$LOCKDSANLOCK
+
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+	AC_DEFINE([LOCKDSANLOCK_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd sanlock option.])
+fi
+
+################################################################################
+dnl -- Look for sanlock libraries
+if test "$BUILD_LOCKDSANLOCK" = yes; then
+	PKG_CHECK_MODULES(LOCKD_SANLOCK, libsanlock_client, [HAVE_LOCKD_SANLOCK=yes], $bailout)
+	BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+dnl -- Build lockddlm
+AC_MSG_CHECKING(whether to build lockddlm)
+AC_ARG_ENABLE(lockd-dlm,
+	      AC_HELP_STRING([--enable-lockd-dlm],
+			     [enable the LVM lock daemon using dlm]),
+	      LOCKDDLM=$enableval)
+AC_MSG_RESULT($LOCKDDLM)
+
+BUILD_LOCKDDLM=$LOCKDDLM
+
+if test "$BUILD_LOCKDDLM" = yes; then
+	AC_DEFINE([LOCKDDLM_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd dlm option.])
+fi
+
+################################################################################
+dnl -- Look for dlm libraries
+if test "$BUILD_LOCKDDLM" = yes; then
+	PKG_CHECK_MODULES(LOCKD_DLM, libdlm, [HAVE_LOCKD_DLM=yes], $bailout)
+	BUILD_LVMLOCKD=yes
+fi
+
+################################################################################
+dnl -- Build lvmlockd
+
+AC_MSG_CHECKING(whether to build lvmlockd)
+AC_MSG_RESULT($BUILD_LVMLOCKD)
+
+if test "$BUILD_LVMLOCKD" = yes; then
+	AS_IF([test -n "$BUILD_LVMPOLLD"], [BUILD_LVMPOLLD=yes; AC_MSG_WARN([Enabling lvmpolld - required by lvmlockd.])])
+	AS_IF([test -n "$BUILD_LVMETAD"], [BUILD_LVMETAD=yes; AC_MSG_WARN([Enabling lvmetad - required by lvmlockd.])])
+	AC_MSG_CHECKING([defaults for use_lvmlockd])
+	AC_ARG_ENABLE(use_lvmlockd,
+		      AC_HELP_STRING([--disable-use-lvmlockd],
+				     [disable usage of LVM lock daemon]),
+		      [case ${enableval} in
+		       yes) DEFAULT_USE_LVMLOCKD=1 ;;
+		       *) DEFAULT_USE_LVMLOCKD=0 ;;
+		       esac], DEFAULT_USE_LVMLOCKD=1)
+	AC_MSG_RESULT($DEFAULT_USE_LVMLOCKD)
+	AC_DEFINE([LVMLOCKD_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd.])
+
+	AC_ARG_WITH(lvmlockd-pidfile,
+		    AC_HELP_STRING([--with-lvmlockd-pidfile=PATH],
+				   [lvmlockd pidfile [PID_DIR/lvmlockd.pid]]),
+		    LVMLOCKD_PIDFILE=$withval,
+		    LVMLOCKD_PIDFILE="$DEFAULT_PID_DIR/lvmlockd.pid")
+	AC_DEFINE_UNQUOTED(LVMLOCKD_PIDFILE, ["$LVMLOCKD_PIDFILE"],
+			   [Path to lvmlockd pidfile.])
+else
+	DEFAULT_USE_LVMLOCKD=0
+fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMLOCKD, [$DEFAULT_USE_LVMLOCKD],
+		   [Use lvmlockd by default.])
+
+################################################################################
+dnl -- Check lvmetad
 if test "$BUILD_LVMETAD" = yes; then
+	AC_MSG_CHECKING([defaults for use_lvmetad])
+	AC_ARG_ENABLE(use_lvmetad,
+		      AC_HELP_STRING([--disable-use-lvmetad],
+				     [disable usage of LVM Metadata Daemon]),
+		      [case ${enableval} in
+		       yes) DEFAULT_USE_LVMETAD=1 ;;
+		       *) DEFAULT_USE_LVMETAD=0 ;;
+		       esac], DEFAULT_USE_LVMETAD=1)
+	AC_MSG_RESULT($DEFAULT_USE_LVMETAD)
 	AC_DEFINE([LVMETAD_SUPPORT], 1, [Define to 1 to include code that uses lvmetad.])

 	AC_ARG_WITH(lvmetad-pidfile,
@@ -1071,9 +1233,41 @@ if test "$BUILD_LVMETAD" = yes; then
 		    LVMETAD_PIDFILE="$DEFAULT_PID_DIR/lvmetad.pid")
 	AC_DEFINE_UNQUOTED(LVMETAD_PIDFILE, ["$LVMETAD_PIDFILE"],
 			   [Path to lvmetad pidfile.])
+else
+	DEFAULT_USE_LVMETAD=0
 fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMETAD, [$DEFAULT_USE_LVMETAD],
+		   [Use lvmetad by default.])

 ################################################################################
+dnl -- Check lvmpolld
+if test "$BUILD_LVMPOLLD" = yes; then
+	AC_MSG_CHECKING([defaults for use_lvmpolld])
+	AC_ARG_ENABLE(use_lvmpolld,
+		      AC_HELP_STRING([--disable-use-lvmpolld],
+				     [disable usage of LVM Poll Daemon]),
+		      [case ${enableval} in
+		       yes) DEFAULT_USE_LVMPOLLD=1 ;;
+		       *) DEFAULT_USE_LVMPOLLD=0 ;;
+		       esac], DEFAULT_USE_LVMPOLLD=1)
+	AC_MSG_RESULT($DEFAULT_USE_LVMPOLLD)
+	AC_DEFINE([LVMPOLLD_SUPPORT], 1, [Define to 1 to include code that uses lvmpolld.])
+
+	AC_ARG_WITH(lvmpolld-pidfile,
+		    AC_HELP_STRING([--with-lvmpolld-pidfile=PATH],
+				   [lvmpolld pidfile [PID_DIR/lvmpolld.pid]]),
+		    LVMPOLLD_PIDFILE=$withval,
+		    LVMPOLLD_PIDFILE="$DEFAULT_PID_DIR/lvmpolld.pid")
+	AC_DEFINE_UNQUOTED(LVMPOLLD_PIDFILE, ["$LVMPOLLD_PIDFILE"],
+			   [Path to lvmpolld pidfile.])
+else
+	DEFAULT_USE_LVMPOLLD=0
+fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_LVMPOLLD, [$DEFAULT_USE_LVMPOLLD],
+		   [Use lvmpolld by default.])
+
+################################################################################
+
 dnl -- Enable blkid wiping functionality
 AC_MSG_CHECKING(whether to enable libblkid detection of signatures when wiping)
 AC_ARG_ENABLE(blkid_wiping,
@@ -1093,9 +1287,16 @@ if test "$BLKID_WIPING" != no; then
 			   fi])
 	if test "$BLKID_WIPING" = yes; then
 		BLKID_PC="blkid"
+		DEFAULT_USE_BLKID_WIPING=1
 		AC_DEFINE([BLKID_WIPING_SUPPORT], 1, [Define to 1 to use libblkid detection of signatures when wiping.])
+	else
+		DEFAULT_USE_BLKID_WIPING=1
 	fi
+else
+	DEFAULT_USE_BLKID_WIPING=0
 fi
+AC_DEFINE_UNQUOTED(DEFAULT_USE_BLKID_WIPING, [$DEFAULT_USE_BLKID_WIPING],
+		   [Use blkid wiping by default.])

 ################################################################################
 dnl -- Enable udev-systemd protocol to instantiate a service for background jobs
@@ -1168,11 +1369,11 @@ AC_ARG_ENABLE(compat,
 			     [enable support for old device-mapper versions]),
 	      DM_COMPAT=$enableval, DM_COMPAT=no)

-if test "$DM_COMPAT" = yes; then
-	AC_MSG_ERROR([--enable-compat is not currently supported.
+AS_IF([test "$DM_COMPAT" = yes],
+      [AC_DEFINE([DM_COMPAT], 1, [Define to enable compat protocol])
+      AC_MSG_ERROR([--enable-compat is not currently supported.
 Since device-mapper version 1.02.66, only one version (4) of the device-mapper
-ioctl protocol is supported.])
-fi
+ioctl protocol is supported.])])

 ################################################################################
 dnl -- Compatible units suffix mode
@@ -1192,6 +1393,8 @@ AC_ARG_ENABLE(ioctl,
 	      AC_HELP_STRING([--disable-ioctl],
 			     [disable ioctl calls to device-mapper in the kernel]),
 	      DM_IOCTLS=$enableval)
+AS_IF([test "$DM_IOCTLS" = yes],
+      [AC_DEFINE([DM_IOCTLS], 1, [Define to enable ioctls calls to kernel])])

 ################################################################################
 dnl -- Disable O_DIRECT
@@ -1374,9 +1577,11 @@ if test "$REALTIME" = yes; then
 	if test "$HAVE_REALTIME" = yes; then
 		AC_DEFINE([HAVE_REALTIME], 1, [Define to 1 to include support for realtime clock.])
 		LIBS="-lrt $LIBS"
+		RT_PC="librt"
 	else
 		AC_MSG_WARN(Disabling realtime clock)
 	fi
+	AC_MSG_RESULT($HAVE_REALTIME)
 fi

 dnl Check if the system has struct stat st_ctim.
@@ -1450,14 +1655,16 @@ if test "$INTL" = yes; then
 # FIXME - Move this - can be device-mapper too
 	INTL_PACKAGE="lvm2"
 	AC_PATH_TOOL(MSGFMT, msgfmt)
-	if [[ -z "$MSGFMT" ]]; then
-		AC_MSG_ERROR([msgfmt not found in path $PATH])
-	fi
+
+	AS_IF([test -z "$MSGFMT"], [AC_MSG_ERROR([msgfmt not found in path $PATH])])

 	AC_ARG_WITH(localedir,
 		    AC_HELP_STRING([--with-localedir=DIR],
-				   [translation files in DIR [PREFIX/share/locale]]),
-		    LOCALEDIR=$withval, LOCALEDIR='${prefix}/share/locale')
+				   [locale-dependent data [DATAROOTDIR/locale]]),
+		    localedir=$withval, localedir=${localedir-'${datarootdir}/locale'})
+	AC_DEFINE_UNQUOTED([INTL_PACKAGE], ["$INTL_PACKAGE"], [Internalization package])
+	# double eval needed ${datarootdir} -> ${prefix}/share -> real path
+	AC_DEFINE_UNQUOTED([LOCALEDIR], ["$(eval echo $(eval echo $localedir))"], [Locale-dependent data])
 fi

 ################################################################################
@@ -1522,6 +1729,19 @@ if test "$READLINE" = yes; then
 	AC_CHECK_HEADERS(readline/readline.h readline/history.h,,hard_bailout)
 fi

+if test "$BUILD_CMIRRORD" = yes; then
+	AC_CHECK_FUNCS(atexit,,hard_bailout)
+fi
+
+if test "$BUILD_LVMLOCKD" = yes; then
+	AC_CHECK_FUNCS(clock_gettime strtoull,,hard_bailout)
+fi
+
+if test "$BUILD_LVMPOLLD" = yes; then
+	AC_CHECK_FUNCS(strpbrk,,hard_bailout)
+	AC_FUNC_STRERROR_R
+fi
+
 if test "$CLVMD" != none; then
 	AC_CHECK_HEADERS(mntent.h netdb.h netinet/in.h pthread.h search.h sys/mount.h sys/socket.h sys/uio.h sys/un.h utmpx.h,,AC_MSG_ERROR(bailing out))
 	AC_CHECK_FUNCS(dup2 getmntent memmove select socket,,hard_bailout)
@@ -1679,10 +1899,13 @@ LVM_LIBAPI=`echo "$VER" | $AWK -F '[[()]]' '{print $2}'`
 AC_SUBST(APPLIB)
 AC_SUBST(AWK)
 AC_SUBST(BLKID_PC)
-AC_SUBST(BLKID_WIPING)
 AC_SUBST(BUILD_CMIRRORD)
 AC_SUBST(BUILD_DMEVENTD)
 AC_SUBST(BUILD_LVMETAD)
+AC_SUBST(BUILD_LVMPOLLD)
+AC_SUBST(BUILD_LVMLOCKD)
+AC_SUBST(BUILD_LOCKDSANLOCK)
+AC_SUBST(BUILD_LOCKDDLM)
 AC_SUBST(CACHE)
 AC_SUBST(CFLAGS)
 AC_SUBST(CFLOW_CMD)
@@ -1712,6 +1935,7 @@ AC_SUBST(DEFAULT_CACHE_SUBDIR)
 AC_SUBST(DEFAULT_DATA_ALIGNMENT)
 AC_SUBST(DEFAULT_DM_RUN_DIR)
 AC_SUBST(DEFAULT_LOCK_DIR)
+AC_SUBST(DEFAULT_FALLBACK_TO_LVM1)
 AC_SUBST(DEFAULT_MIRROR_SEGTYPE)
 AC_SUBST(DEFAULT_PID_DIR)
 AC_SUBST(DEFAULT_PROFILE_SUBDIR)
@@ -1719,17 +1943,16 @@ AC_SUBST(DEFAULT_RAID10_SEGTYPE)
 AC_SUBST(DEFAULT_RUN_DIR)
 AC_SUBST(DEFAULT_SPARSE_SEGTYPE)
 AC_SUBST(DEFAULT_SYS_DIR)
+AC_SUBST(DEFAULT_USE_BLKID_WIPING)
+AC_SUBST(DEFAULT_USE_LVMETAD)
+AC_SUBST(DEFAULT_USE_LVMPOLLD)
+AC_SUBST(DEFAULT_USE_LVMLOCKD)
 AC_SUBST(DEVMAPPER)
 AC_SUBST(DLM_CFLAGS)
 AC_SUBST(DLM_LIBS)
 AC_SUBST(DL_LIBS)
 AC_SUBST(DMEVENTD)
 AC_SUBST(DMEVENTD_PATH)
-AC_SUBST(DM_COMPAT)
-AC_SUBST(DM_DEVICE_GID)
-AC_SUBST(DM_DEVICE_MODE)
-AC_SUBST(DM_DEVICE_UID)
-AC_SUBST(DM_IOCTLS)
 AC_SUBST(DM_LIB_VERSION)
 AC_SUBST(DM_LIB_PATCHLEVEL)
 AC_SUBST(ELDFLAGS)
@@ -1739,12 +1962,10 @@ AC_SUBST(HAVE_LIBDL)
 AC_SUBST(HAVE_REALTIME)
 AC_SUBST(HAVE_VALGRIND)
 AC_SUBST(INTL)
-AC_SUBST(INTL_PACKAGE)
 AC_SUBST(JOBS)
 AC_SUBST(LDDEPS)
 AC_SUBST(LIBS)
 AC_SUBST(LIB_SUFFIX)
-AC_SUBST(LOCALEDIR)
 AC_SUBST(LVM1)
 AC_SUBST(LVM1_FALLBACK)
 AC_SUBST(LVM_VERSION)
@@ -1755,6 +1976,7 @@ AC_SUBST(LVM_PATCHLEVEL)
 AC_SUBST(LVM_PATH)
 AC_SUBST(LVM_RELEASE)
 AC_SUBST(LVM_RELEASE_DATE)
+AC_SUBST(localedir)
 AC_SUBST(MANGLING)
 AC_SUBST(MIRRORS)
 AC_SUBST(MSGFMT)
@@ -1770,6 +1992,7 @@ AC_SUBST(PYTHON_LIBDIRS)
 AC_SUBST(QUORUM_CFLAGS)
 AC_SUBST(QUORUM_LIBS)
 AC_SUBST(RAID)
+AC_SUBST(RT_PC)
 AC_SUBST(READLINE_LIBS)
 AC_SUBST(REPLICATORS)
 AC_SUBST(SACKPT_CFLAGS)
@@ -1782,6 +2005,7 @@ AC_SUBST(SNAPSHOTS)
 AC_SUBST(STATICDIR)
 AC_SUBST(STATIC_LINK)
 AC_SUBST(TESTING)
+AC_SUBST(TESTSUITE_DATA)
 AC_SUBST(THIN)
 AC_SUBST(THIN_CHECK_CMD)
 AC_SUBST(THIN_DUMP_CMD)
@@ -1801,6 +2025,8 @@ AC_SUBST(VALGRIND_POOL)
 AC_SUBST(WRITE_INSTALL)
 AC_SUBST(DMEVENTD_PIDFILE)
 AC_SUBST(LVMETAD_PIDFILE)
+AC_SUBST(LVMPOLLD_PIDFILE)
+AC_SUBST(LVMLOCKD_PIDFILE)
 AC_SUBST(CLVMD_PIDFILE)
 AC_SUBST(CMIRRORD_PIDFILE)
 AC_SUBST(interface)
@@ -1834,6 +2060,8 @@ daemons/dmeventd/plugins/mirror/Makefile
 daemons/dmeventd/plugins/snapshot/Makefile
 daemons/dmeventd/plugins/thin/Makefile
 daemons/lvmetad/Makefile
+daemons/lvmpolld/Makefile
+daemons/lvmlockd/Makefile
 conf/Makefile
 conf/example.conf
 conf/lvmlocal.conf
@@ -1877,6 +2105,11 @@ scripts/lvm2_cmirrord_systemd_red_hat.service
 scripts/lvm2_lvmetad_init_red_hat
 scripts/lvm2_lvmetad_systemd_red_hat.service
 scripts/lvm2_lvmetad_systemd_red_hat.socket
+scripts/lvm2_lvmpolld_init_red_hat
+scripts/lvm2_lvmpolld_systemd_red_hat.service
+scripts/lvm2_lvmpolld_systemd_red_hat.socket
+scripts/lvm2_lvmlockd_systemd_red_hat.service
+scripts/lvm2_lvmlocking_systemd_red_hat.service
 scripts/lvm2_monitoring_init_red_hat
 scripts/lvm2_monitoring_systemd_red_hat.service
 scripts/lvm2_pvscan_systemd_red_hat@.service
@@ -1893,10 +2126,14 @@ unit-tests/mm/Makefile
 ])
 AC_OUTPUT

-test -n "$THIN_CONFIGURE_WARN" && AC_MSG_WARN([Support for thin provisioning is limited since some thin provisioning tools are missing!])
+AS_IF([test -n "$THIN_CONFIGURE_WARN"],
+      [AC_MSG_WARN([Support for thin provisioning is limited since some thin provisioning tools are missing!])])

-test -n "$THIN_CHECK_VERSION_WARN" && AC_MSG_WARN([You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning])
+AS_IF([test -n "$THIN_CHECK_VERSION_WARN"],
+      [AC_MSG_WARN([You should also install thin_check vsn 0.3.2 (or later) to use lvm2 thin provisioning])])

-test -n "$CACHE_CONFIGURE_WARN" && AC_MSG_WARN([Support for cache is limited since some cache tools are missing!])
+AS_IF([test -n "$CACHE_CONFIGURE_WARN"],
+      [AC_MSG_WARN([Support for cache is limited since some cache tools are missing!])])

-test "$ODIRECT" = yes || AC_MSG_WARN([O_DIRECT disabled: low-memory pvmove may lock up])
+AS_IF([test "$ODIRECT" != yes],
+      [AC_MSG_WARN([O_DIRECT disabled: low-memory pvmove may lock up])])
--- a/daemons/Makefile.in
+++ b/daemons/Makefile.in
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+# Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
 #
 # This file is part of LVM2.
 #
@@ -15,7 +15,7 @@ srcdir = @srcdir@
 top_srcdir = @top_srcdir@
 top_builddir = @top_builddir@

-.PHONY: dmeventd clvmd cmirrord lvmetad
+.PHONY: dmeventd clvmd cmirrord lvmetad lvmpolld lvmlockd

 ifneq ("@CLVMD@", "none")
  SUBDIRS += clvmd
@@ -36,8 +36,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
  SUBDIRS += lvmetad
 endif

+ifeq ("@BUILD_LVMPOLLD@", "yes")
+  SUBDIRS += lvmpolld
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+  SUBDIRS += lvmlockd 
+endif
+
 ifeq ($(MAKECMDGOALS),distclean)
-  SUBDIRS = clvmd cmirrord dmeventd lvmetad
+  SUBDIRS = clvmd cmirrord dmeventd lvmetad lvmpolld lvmlockd
 endif

 include $(top_builddir)/make.tmpl
--- a/daemons/clvmd/.gitignore
+++ b/daemons/clvmd/.gitignore
@@ -0,0 +1 @@
+clvmd
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -36,10 +36,6 @@ SOURCES = \
 	lvm-functions.c  \
 	refresh_clvmd.c

-ifeq ("@DEBUG@", "yes")
-	DEFS += -DDEBUG
-endif
-
 ifneq (,$(findstring cman,, "@CLVMD@,"))
 	SOURCES += clvmd-cman.c
 	LMLIBS += $(CMAN_LIBS) $(CONFDB_LIBS) $(DLM_LIBS)
--- a/daemons/clvmd/clvmd-command.c
+++ b/daemons/clvmd/clvmd-command.c
@@ -323,6 +323,7 @@ void cmd_client_cleanup(struct local_client *client)
 	int lkid;
 	char *lockname;

+	DEBUGLOG("Client thread cleanup (%p)\n", client);
 	if (!client->bits.localsock.private)
 		return;

@@ -331,7 +332,7 @@ void cmd_client_cleanup(struct local_client *client)
 	dm_hash_iterate(v, lock_hash) {
 		lkid = (int)(long)dm_hash_get_data(lock_hash, v);
 		lockname = dm_hash_get_key(lock_hash, v);
-		DEBUGLOG("cleanup: Unlocking lock %s %x\n", lockname, lkid);
+		DEBUGLOG("Cleanup (%p): Unlocking lock %s %x\n", client, lockname, lkid);
 		(void) sync_unlock(lockname, lkid);
 	}

@@ -339,7 +340,6 @@ void cmd_client_cleanup(struct local_client *client)
 	client->bits.localsock.private = NULL;
 }

-
 static int restart_clvmd(void)
 {
 	const char **argv;
--- a/daemons/clvmd/clvmd-common.h
+++ b/daemons/clvmd/clvmd-common.h
@@ -18,15 +18,10 @@
 #ifndef _LVM_CLVMD_COMMON_H
 #define _LVM_CLVMD_COMMON_H

-#include "configure.h"
-
 #define _REENTRANT
-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64

-#include "libdevmapper.h"
+#include "tool.h"
+
 #include "lvm-logging.h"

-#include <unistd.h>
-
 #endif
--- a/daemons/clvmd/clvmd-openais.c
+++ b/daemons/clvmd/clvmd-openais.c
@@ -243,7 +243,7 @@ static void openais_cpg_confchg_callback(cpg_handle_t handle,
 	struct node_info *ninfo;

 	DEBUGLOG("confchg callback. %" PRIsize_t " joined, "
-		 "%" PRIsize_t " left, %" PRIsize_t " members\n",
+		 FMTsize_t " left, %" PRIsize_t " members\n",
 		 joined_list_entries, left_list_entries, member_list_entries);

 	for (i=0; i<joined_list_entries; i++) {
--- a/daemons/clvmd/clvmd-singlenode.c
+++ b/daemons/clvmd/clvmd-singlenode.c
@@ -208,8 +208,6 @@ static int _lock_resource(const char *resource, int mode, int flags, int *lockid
 	pthread_mutex_lock(&_lock_mutex);

 retry:
-	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
-
 	if (!(head = dm_hash_lookup(_locks, resource))) {
 		if (flags & LCKF_CONVERT) {
 			/* In real DLM, lock is identified only by lockid, resource is not used */
@@ -269,12 +267,14 @@ retry:
 		dm_list_add(head, &lck->list);
 	}
 out:
+	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
 	pthread_mutex_unlock(&_lock_mutex);
 	DEBUGLOG("Locked resource %s, lockid=%d, mode=%s\n",
 		 resource, lck->lockid, _get_mode(lck->mode));

 	return 0;
 bad:
+	pthread_cond_broadcast(&_lock_cond); /* to wakeup waiters */
 	pthread_mutex_unlock(&_lock_mutex);
 	DEBUGLOG("Failed to lock resource %s\n", resource);

--- a/daemons/clvmd/clvmd.c
+++ b/daemons/clvmd/clvmd.c
@@ -172,6 +172,7 @@ static void usage(const char *prog, FILE *file)
 #ifdef USE_SINGLENODE
 		"singlenode "
 #endif
+		"\n"
 		"   -R       Tell all running clvmds in the cluster to reload their device cache\n"
 		"   -S       Restart clvmd, preserving exclusive locks\n"
 		"   -t<secs> Command timeout (default: 60 seconds)\n"
@@ -222,6 +223,7 @@ void debuglog(const char *fmt, ...)
 		fprintf(stderr, "CLVMD[%x]: %.15s ", (int)pthread_self(), ctime_r(&P, buf_ctime) + 4);
 		vfprintf(stderr, fmt, ap);
 		va_end(ap);
+		fflush(stderr);
 		break;
 	case DEBUG_SYSLOG:
 		if (!syslog_init) {
@@ -597,7 +599,9 @@ int main(int argc, char *argv[])

 	/* This needs to be started after cluster initialisation
 	   as it may need to take out locks */
-	DEBUGLOG("starting LVM thread\n");
+	DEBUGLOG("Starting LVM thread\n");
+	DEBUGLOG("Main cluster socket fd %d (%p) with local socket %d (%p)\n",
+		 local_client_head.fd, &local_client_head, newfd->fd, newfd);

 	/* Don't let anyone else to do work until we are started */
 	pthread_create(&lvm_thread, &stack_attr, lvm_thread_fn, &lvm_params);
@@ -697,7 +701,7 @@ static int local_rendezvous_callback(struct local_client *thisfd, char *buf,
 		newfd->type = LOCAL_SOCK;
 		newfd->callback = local_sock_callback;
 		newfd->bits.localsock.all_success = 1;
-		DEBUGLOG("Got new connection on fd %d\n", newfd->fd);
+		DEBUGLOG("Got new connection on fd %d (%p)\n", newfd->fd, newfd);
 		*new_client = newfd;
 	}
 	return 1;
@@ -849,18 +853,48 @@ static void main_loop(int cmd_timeout)
 		struct local_client *thisfd;
 		struct timeval tv = { cmd_timeout, 0 };
 		int quorate = clops->is_quorate();
+		int client_count = 0;
+		int max_fd = 0;

 		/* Wait on the cluster FD and all local sockets/pipes */
 		local_client_head.fd = clops->get_main_cluster_fd();
 		FD_ZERO(&in);
+		struct local_client *lastfd = &local_client_head;
+		struct local_client *nextfd = local_client_head.next;
+
 		for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
+			client_count++;
+			max_fd = max(max_fd, thisfd->fd);
+		}
+
+		if (max_fd > FD_SETSIZE - 32) {
+			fprintf(stderr, "WARNING: There are too many connections to clvmd.  Investigate and take action now!\n");
+ 			fprintf(stderr, "WARNING: Your cluster may freeze up if the number of clvmd file descriptors (%d) exceeds %d.\n", max_fd + 1, FD_SETSIZE);
+		}
+
+		for (thisfd = &local_client_head; thisfd; thisfd = nextfd, nextfd = thisfd ? thisfd->next : NULL) {
+
+			if (thisfd->removeme && !cleanup_zombie(thisfd)) {
+				struct local_client *free_fd = thisfd;
+				lastfd->next = nextfd;
+				DEBUGLOG("removeme set for %p with %d monitored fds remaining\n", free_fd, client_count - 1);
+
+				/* Queue cleanup, this also frees the client struct */
+				add_to_lvmqueue(free_fd, NULL, 0, NULL);
+
+				continue;
+			}
+
+			lastfd = thisfd;
+
 			if (thisfd->removeme)
 				continue;

 			/* if the cluster is not quorate then don't listen for new requests */
 			if ((thisfd->type != LOCAL_RENDEZVOUS &&
 			     thisfd->type != LOCAL_SOCK) || quorate)
-				FD_SET(thisfd->fd, &in);
+				if (thisfd->fd < FD_SETSIZE)
+					FD_SET(thisfd->fd, &in);
 		}

 		select_status = select(FD_SETSIZE, &in, NULL, NULL, &tv);
@@ -876,31 +910,20 @@ static void main_loop(int cmd_timeout)
 		}

 		if (select_status > 0) {
-			struct local_client *lastfd = NULL;
 			char csid[MAX_CSID_LEN];
 			char buf[max_cluster_message];

 			for (thisfd = &local_client_head; thisfd; thisfd = thisfd->next) {
-				if (thisfd->removeme && !cleanup_zombie(thisfd)) {
-					struct local_client *free_fd = thisfd;
-					lastfd->next = thisfd->next;
-					DEBUGLOG("removeme set for fd %d\n", free_fd->fd);
-
-					/* Queue cleanup, this also frees the client struct */
-					add_to_lvmqueue(free_fd, NULL, 0, NULL);
-					break;
-				}
-
-				if (FD_ISSET(thisfd->fd, &in)) {
+				if (thisfd->fd < FD_SETSIZE && FD_ISSET(thisfd->fd, &in)) {
 					struct local_client *newfd = NULL;
 					int ret;

+					/* FIXME Remove from main thread in case it blocks! */
 					/* Do callback */
 					ret = thisfd->callback(thisfd, buf, sizeof(buf),
 							       csid, &newfd);
 					/* Ignore EAGAIN */
 					if (ret < 0 && (errno == EAGAIN || errno == EINTR)) {
-						lastfd = thisfd;
 						continue;
                                        }

@@ -916,17 +939,16 @@ static void main_loop(int cmd_timeout)
 						DEBUGLOG("ret == %d, errno = %d. removing client\n",
 							 ret, errno);
 						thisfd->removeme = 1;
-						break;
+						continue;
 					}

 					/* New client...simply add it to the list */
 					if (newfd) {
 						newfd->next = thisfd->next;
 						thisfd->next = newfd;
-						break;
+						thisfd = newfd;
 					}
 				}
-				lastfd = thisfd;
 			}
 		}

@@ -1419,7 +1441,7 @@ static int read_from_local_sock(struct local_client *thisfd)
 	thisfd->bits.localsock.in_progress = TRUE;
 	thisfd->bits.localsock.state = PRE_COMMAND;
 	thisfd->bits.localsock.cleanup_needed = 1;
-	DEBUGLOG("Creating pre&post thread\n");
+	DEBUGLOG("Creating pre&post thread for pipe fd %d (%p)\n", newfd->fd, newfd);
 	status = pthread_create(&thisfd->bits.localsock.threadid,
 				&stack_attr, pre_and_post_thread, thisfd);
 	DEBUGLOG("Created pre&post thread, state = %d\n", status);
@@ -1673,7 +1695,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
 	sigset_t ss;
 	int pipe_fd = client->bits.localsock.pipe;

-	DEBUGLOG("Pre&post thread (%p), pipe %d\n", client, pipe_fd);
+	DEBUGLOG("Pre&post thread (%p), pipe fd %d\n", client, pipe_fd);
 	pthread_mutex_lock(&client->bits.localsock.mutex);

 	/* Ignore SIGUSR1 (handled by master process) but enable
@@ -1693,7 +1715,7 @@ static __attribute__ ((noreturn)) void *pre_and_post_thread(void *arg)
 		if ((status = do_pre_command(client)))
 			client->bits.localsock.all_success = 0;

-		DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe %d\n",
+		DEBUGLOG("Pre&post thread (%p) writes status %d down to pipe fd %d\n",
 			 client, status, pipe_fd);

 		/* Tell the parent process we have finished this bit */
@@ -1975,7 +1997,7 @@ static int process_work_item(struct lvm_thread_cmd *cmd)
 {
 	/* If msg is NULL then this is a cleanup request */
 	if (cmd->msg == NULL) {
-		DEBUGLOG("process_work_item: free fd %d\n", cmd->client->fd);
+		DEBUGLOG("process_work_item: free %p\n", cmd->client);
 		cmd_client_cleanup(cmd->client);
 		pthread_mutex_destroy(&cmd->client->bits.localsock.mutex);
 		pthread_cond_destroy(&cmd->client->bits.localsock.cond);
--- a/daemons/clvmd/lvm-functions.c
+++ b/daemons/clvmd/lvm-functions.c
@@ -510,7 +510,7 @@ int do_lock_lv(unsigned char command, unsigned char lock_flags, char *resource)
 	DEBUGLOG("do_lock_lv: resource '%s', cmd = %s, flags = %s, critical_section = %d\n",
 		 resource, decode_locking_cmd(command), decode_flags(lock_flags), critical_section());

-	if (!cmd->config_initialized || config_files_changed(cmd)) {
+	if (!cmd->initialized.config || config_files_changed(cmd)) {
 		/* Reinitialise various settings inc. logging, filters */
 		if (do_refresh_cache()) {
 			log_error("Updated config file invalid. Aborting.");
@@ -899,7 +899,7 @@ int init_clvm(struct dm_hash_table *excl_uuid)
 	if (!get_initial_state(excl_uuid))
 		log_error("Cannot load initial lock states.");

-	if (!(cmd = create_toolcontext(1, NULL, 0, 1))) {
+	if (!(cmd = create_toolcontext(1, NULL, 0, 1, 1, 1))) {
 		log_error("Failed to allocate command context");
 		return 0;
 	}
--- a/daemons/cmirrord/.gitignore
+++ b/daemons/cmirrord/.gitignore
@@ -0,0 +1 @@
+cmirrord
--- a/daemons/dmeventd/.gitignore
+++ b/daemons/dmeventd/.gitignore
@@ -0,0 +1 @@
+dmeventd
--- a/daemons/dmeventd/dmeventd.c
+++ b/daemons/dmeventd/dmeventd.c
@@ -16,26 +16,21 @@
 * dmeventd - dm event daemon to monitor active mapped devices
 */

-#define _GNU_SOURCE
-#define _FILE_OFFSET_BITS 64
+#include "tool.h"

-#include "configure.h"
-#include "libdevmapper.h"
-#include "libdevmapper-event.h"
-#include "dmeventd.h"
 //#include "libmultilog.h"
 #include "dm-logging.h"

-#include <stdarg.h>
+#include "libdevmapper-event.h"
+#include "dmeventd.h"
+
 #include <dlfcn.h>
-#include <errno.h>
 #include <pthread.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/time.h>
 #include <sys/resource.h>
-#include <unistd.h>
 #include <signal.h>
 #include <arpa/inet.h>		/* for htonl, ntohl */
 #include <fcntl.h>		/* for musl libc */
@@ -133,51 +128,20 @@ void debuglog(const char *fmt, ...)

 static const char *decode_cmd(uint32_t cmd)
 {
-	static char buf[128];
-	const char *command;
-
 	switch (cmd) {
-	case DM_EVENT_CMD_ACTIVE:
-		command = "ACTIVE";
-		break;
-	case DM_EVENT_CMD_REGISTER_FOR_EVENT:
-		command = "REGISTER_FOR_EVENT";
-		break;
-	case DM_EVENT_CMD_UNREGISTER_FOR_EVENT:
-		command = "UNREGISTER_FOR_EVENT";
-		break;
-	case DM_EVENT_CMD_GET_REGISTERED_DEVICE:
-		command = "GET_REGISTERED_DEVICE";
-		break;
-	case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE:
-		command = "GET_NEXT_REGISTERED_DEVICE";
-		break;
-	case DM_EVENT_CMD_SET_TIMEOUT:
-		command = "SET_TIMEOUT";
-		break;
-	case DM_EVENT_CMD_GET_TIMEOUT:
-		command = "GET_TIMEOUT";
-		break;
-	case DM_EVENT_CMD_HELLO:
-		command = "HELLO";
-		break;
-	case DM_EVENT_CMD_DIE:
-		command = "DIE";
-		break;
-	case DM_EVENT_CMD_GET_STATUS:
-		command = "GET_STATUS";
-		break;
-	case DM_EVENT_CMD_GET_PARAMETERS:
-		command = "GET_PARAMETERS";
-		break;
-	default:
-		command = "unknown";
-		break;
+	case DM_EVENT_CMD_ACTIVE:			return "ACTIVE";
+	case DM_EVENT_CMD_REGISTER_FOR_EVENT:		return "REGISTER_FOR_EVENT";
+	case DM_EVENT_CMD_UNREGISTER_FOR_EVENT:		return "UNREGISTER_FOR_EVENT";
+	case DM_EVENT_CMD_GET_REGISTERED_DEVICE:	return "GET_REGISTERED_DEVICE";
+	case DM_EVENT_CMD_GET_NEXT_REGISTERED_DEVICE:	return "GET_NEXT_REGISTERED_DEVICE";
+	case DM_EVENT_CMD_SET_TIMEOUT:			return "SET_TIMEOUT";
+	case DM_EVENT_CMD_GET_TIMEOUT:			return "GET_TIMEOUT";
+	case DM_EVENT_CMD_HELLO:			return "HELLO";
+	case DM_EVENT_CMD_DIE:				return "DIE";
+	case DM_EVENT_CMD_GET_STATUS:			return "GET_STATUS";
+	case DM_EVENT_CMD_GET_PARAMETERS:		return "GET_PARAMETERS";
+	default:					return "unknown";
 	}
-
-	snprintf(buf, sizeof(buf), "%s (0x%x)", command, cmd);
-
-	return buf;
 }

 #else
@@ -710,6 +674,7 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
 	int ret = DM_WAIT_RETRY;
 	struct dm_task *dmt;
 	struct dm_info info;
+	int ioctl_errno;

 	*task = 0;

@@ -739,25 +704,27 @@ static int _event_wait(struct thread_status *thread, struct dm_task **task)
 	 * either for a timeout event, or to cancel the thread.
 	 */
 	set = _unblock_sigalrm();
-	errno = 0;
 	if (dm_task_run(dmt)) {
 		thread->current_events |= DM_EVENT_DEVICE_ERROR;
 		ret = DM_WAIT_INTR;

 		if ((ret = dm_task_get_info(dmt, &info)))
 			thread->event_nr = info.event_nr;
-	} else if (thread->events & DM_EVENT_TIMEOUT && errno == EINTR) {
-		thread->current_events |= DM_EVENT_TIMEOUT;
-		ret = DM_WAIT_INTR;
-	} else if (thread->status == DM_THREAD_SHUTDOWN && errno == EINTR) {
-		ret = DM_WAIT_FATAL;
 	} else {
-		syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
-		       errno, strerror(errno));
-		if (errno == ENXIO) {
-			syslog(LOG_ERR, "%s disappeared, detaching",
-			       thread->device.name);
+		ioctl_errno = dm_task_get_errno(dmt);
+		if (thread->events & DM_EVENT_TIMEOUT && ioctl_errno == EINTR) {
+			thread->current_events |= DM_EVENT_TIMEOUT;
+			ret = DM_WAIT_INTR;
+		} else if (thread->status == DM_THREAD_SHUTDOWN && ioctl_errno == EINTR)
 			ret = DM_WAIT_FATAL;
+		else {
+			syslog(LOG_NOTICE, "dm_task_run failed, errno = %d, %s",
+			       ioctl_errno, strerror(ioctl_errno));
+			if (ioctl_errno == ENXIO) {
+				syslog(LOG_ERR, "%s disappeared, detaching",
+				       thread->device.name);
+				ret = DM_WAIT_FATAL;
+			}
 		}
 	}
 	DEBUGLOG("Completed waitevent task for %s", thread->device.uuid);
@@ -1595,9 +1562,6 @@ static void _process_request(struct dm_event_fifos *fifos)
 {
 	int die;
 	struct dm_event_daemon_message msg = { 0 };
-#ifdef DEBUG
-	const char *cmd;
-#endif

 	/*
 	 * Read the request from the client (client_read, client_write
@@ -1606,7 +1570,8 @@ static void _process_request(struct dm_event_fifos *fifos)
 	if (!_client_read(fifos, &msg))
 		return;

-	DEBUGLOG("%s processing...", cmd = decode_cmd(msg.cmd));
+	DEBUGLOG("%s (0x%x) processing...", decode_cmd(msg.cmd), msg.cmd);
+
 	die = (msg.cmd == DM_EVENT_CMD_DIE) ? 1 : 0;

 	/* _do_process_request fills in msg (if memory allows for
@@ -1618,7 +1583,7 @@ static void _process_request(struct dm_event_fifos *fifos)

 	dm_free(msg.data);

-	DEBUGLOG("%s completed.", cmd);
+	DEBUGLOG("%s (0x%x) completed.", decode_cmd(msg.cmd), msg.cmd);

 	if (die) {
 		if (unlink(DMEVENTD_PIDFILE))
@@ -1668,10 +1633,8 @@ static void _cleanup_unused_threads(void)
 				if (ret == ESRCH) {
 					thread->status = DM_THREAD_DONE;
 				} else if (ret) {
-					syslog(LOG_ERR,
-					       "Unable to terminate thread: %s\n",
-					       strerror(-ret));
-					stack;
+					syslog(LOG_ERR, "Unable to terminate thread: %s",
+					       strerror(ret));
 				}
 				break;
 			}
@@ -1703,8 +1666,7 @@ static void _cleanup_unused_threads(void)

 static void _sig_alarm(int signum __attribute__((unused)))
 {
-	DEBUGLOG("Received SIGALRM.");
-	pthread_testcancel();
+	/* empty SIG_IGN */;
 }

 /* Init thread signal handling. */
--- a/daemons/dmeventd/libdevmapper-event.c
+++ b/daemons/dmeventd/libdevmapper-event.c
@@ -17,15 +17,10 @@
 //#include "libmultilog.h"
 #include "dmeventd.h"

-#include <errno.h>
 #include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
 #include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
 #include <sys/wait.h>
 #include <arpa/inet.h>		/* for htonl, ntohl */

--- a/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
+++ b/daemons/dmeventd/plugins/lvm2/dmeventd_lvm.c
@@ -102,7 +102,8 @@ int dmeventd_lvm2_init(void)
 		goto out;

 	if (!_lvm_handle) {
-		lvm2_log_fn(_temporary_log_fn);
+		if (!getenv("LVM_LOG_FILE_EPOCH"))
+			lvm2_log_fn(_temporary_log_fn);
 		if (!(_lvm_handle = lvm2_init())) {
 			dm_pool_destroy(_mem_pool);
 			_mem_pool = NULL;
--- a/daemons/lvmetad/.gitignore
+++ b/daemons/lvmetad/.gitignore
@@ -0,0 +1,2 @@
+lvmetad
+lvmetactl
--- a/daemons/lvmetad/Makefile.in
+++ b/daemons/lvmetad/Makefile.in
@@ -18,7 +18,7 @@ top_builddir = @top_builddir@
 SOURCES = lvmetad-core.c
 SOURCES2 = testclient.c

-TARGETS = lvmetad lvmetad-testclient
+TARGETS = lvmetad lvmetactl

 .PHONY: install_lvmetad

@@ -39,8 +39,11 @@ CFLAGS += $(EXTRA_EXEC_CFLAGS)

 lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
 		    $(top_builddir)/libdaemon/server/libdaemonserver.a
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) \
-	$(DL_LIBS) $(LVMLIBS) $(LIBS) -rdynamic
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+	$(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LVMLIBS)

 # TODO: No idea. No idea how to test either.
 #ifneq ("$(CFLOW_CMD)", "")
--- a/daemons/lvmetad/lvmetactl.c
+++ b/daemons/lvmetad/lvmetactl.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmetad-client.h"
+
+daemon_handle h;
+
+static void print_reply(daemon_reply reply)
+{
+	const char *a = daemon_reply_str(reply, "response", NULL);
+	const char *b = daemon_reply_str(reply, "status", NULL);
+	const char *c = daemon_reply_str(reply, "reason", NULL);
+
+	printf("response \"%s\" status \"%s\" reason \"%s\"\n",
+	       a ? a : "", b ? b : "", c ? c : "");
+}
+
+int main(int argc, char **argv)
+{
+	daemon_reply reply;
+	char *cmd;
+	char *uuid;
+	char *name;
+	int val;
+	int ver;
+
+	if (argc < 2) {
+		printf("lvmeta dump\n");
+		printf("lvmeta pv_list\n");
+		printf("lvmeta vg_list\n");
+		printf("lvmeta vg_lookup_name <name>\n");
+		printf("lvmeta vg_lookup_uuid <uuid>\n");
+		printf("lvmeta pv_lookup_uuid <uuid>\n");
+		printf("lvmeta set_global_invalid 0|1\n");
+		printf("lvmeta get_global_invalid\n");
+		printf("lvmeta set_vg_version <uuid> <version>\n");
+		printf("lvmeta vg_lock_type <uuid>\n");
+		return -1;
+	}
+
+	cmd = argv[1];
+
+	h = lvmetad_open(NULL);
+
+	if (!strcmp(cmd, "dump")) {
+		reply = daemon_send_simple(h, "dump",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "pv_list")) {
+		reply = daemon_send_simple(h, "pv_list",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_list")) {
+		reply = daemon_send_simple(h, "vg_list",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "set_global_invalid")) {
+		if (argc < 3) {
+			printf("set_global_invalid 0|1\n");
+			return -1;
+		}
+		val = atoi(argv[2]);
+
+		reply = daemon_send_simple(h, "set_global_info",
+					   "global_invalid = %d", val,
+					   "token = %s", "skip",
+					   NULL);
+		print_reply(reply);
+
+	} else if (!strcmp(cmd, "get_global_invalid")) {
+		reply = daemon_send_simple(h, "get_global_info",
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "set_vg_version")) {
+		if (argc < 4) {
+			printf("set_vg_version <uuid> <ver>\n");
+			return -1;
+		}
+		uuid = argv[2];
+		ver = atoi(argv[3]);
+
+		reply = daemon_send_simple(h, "set_vg_info",
+					   "uuid = %s", uuid,
+					   "version = %d", ver,
+					   "token = %s", "skip",
+					   NULL);
+		print_reply(reply);
+
+	} else if (!strcmp(cmd, "vg_lookup_name")) {
+		if (argc < 3) {
+			printf("vg_lookup_name <name>\n");
+			return -1;
+		}
+		name = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "name = %s", name,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_lookup_uuid")) {
+		if (argc < 3) {
+			printf("vg_lookup_uuid <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else if (!strcmp(cmd, "vg_lock_type")) {
+		struct dm_config_node *metadata;
+		const char *lock_type;
+
+		if (argc < 3) {
+			printf("vg_lock_type <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "vg_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		/* printf("%s\n", reply.buffer.mem); */
+
+		metadata = dm_config_find_node(reply.cft->root, "metadata");
+		if (!metadata) {
+			printf("no metadata\n");
+			goto out;
+		}
+
+		lock_type = dm_config_find_str(metadata, "metadata/lock_type", NULL);
+		if (!lock_type) {
+			printf("no lock_type\n");
+			goto out;
+		}
+		printf("lock_type %s\n", lock_type);
+
+	} else if (!strcmp(cmd, "pv_lookup_uuid")) {
+		if (argc < 3) {
+			printf("pv_lookup_uuid <uuid>\n");
+			return -1;
+		}
+		uuid = argv[2];
+
+		reply = daemon_send_simple(h, "pv_lookup",
+					   "uuid = %s", uuid,
+					   "token = %s", "skip",
+					   NULL);
+		printf("%s\n", reply.buffer.mem);
+
+	} else {
+		printf("unknown command\n");
+		goto out_close;
+	}
+out:
+	daemon_reply_destroy(reply);
+out_close:
+	daemon_close(h);
+	return 0;
+}
--- a/daemons/lvmetad/lvmetad-core.c
+++ b/daemons/lvmetad/lvmetad-core.c
@@ -14,23 +14,114 @@

 #define _XOPEN_SOURCE 500  /* pthread */

-#include "configure.h"
+#define _REENTRANT
+
+#include "tool.h"
+
 #include "daemon-io.h"
-#include "config-util.h"
 #include "daemon-server.h"
 #include "daemon-log.h"
 #include "lvm-version.h"

 #include <assert.h>
 #include <pthread.h>
-#include <stdint.h>
-#include <unistd.h>
-
-#include <math.h>  /* fabs() */
-#include <float.h> /* DBL_EPSILON */

 #define LVMETAD_SOCKET DEFAULT_RUN_DIR "/lvmetad.socket"

+/*
+ * valid/invalid state of cached metadata
+ *
+ * Normally when using lvmetad, the state is kept up-to-date through a
+ * combination of notifications from clients and updates triggered by uevents.
+ * When using lvmlockd, the lvmetad state is expected to become out of
+ * date (invalid/stale) when other hosts make changes to the metadata on disk.
+ *
+ * To deal with this, the metadata cached in lvmetad can be flagged as invalid.
+ * This invalid flag is returned along with the metadata when read by a
+ * command.  The command can check for the invalid flag and decide that it
+ * should either use the stale metadata (uncommon), or read the latest metadata
+ * from disk rather than using the invalid metadata that was returned.  If the
+ * command reads the latest metadata from disk, it can choose to send it to
+ * lvmetad to update the cached copy and clear the invalid flag in lvmetad.
+ * Otherwise, the next command to read the metadata from lvmetad will also
+ * receive the invalid metadata with the invalid flag (and like the previous
+ * command, it too may choose to read the latest metadata from disk and can
+ * then also choose to update the lvmetad copy.)
+ *
+ * For purposes of tracking the invalid state, LVM metadata is considered
+ * to be either VG-specific or global.  VG-specific metadata is metadata
+ * that is isolated to a VG, such as the LVs it contains.  Global
+ * metadata is metadata that is not isolated to a single VG.  Global
+ * metdata includes:
+ * . the VG namespace (which VG names are used)
+ * . the set of orphan PVs (which PVs are in VGs and which are not)
+ * . properties of orphan PVs (the size of an orphan PV)
+ *
+ * If the metadata for a single VG becomes invalid, the VGFL_INVALID
+ * flag can be set in the vg_info struct for that VG.  If the global
+ * metdata becomes invalid, the GLFL_INVALID flag can be set in the
+ * lvmetad daemon state.
+ *
+ * If a command reads VG metadata and VGFL_INVALID is set, an
+ * extra config node called "vg_invalid" is added to the config
+ * data returned to the command.
+ *
+ * If a command reads global metdata and GLFL_INVALID is set, an
+ * extra config node called "global_invalid" is added to the
+ * config data returned to the command.
+ *
+ * If a command sees vg_invalid, and wants the latest VG metadata,
+ * it only needs to scan disks of the PVs in that VG.
+ * It can then use vg_update to send the latest metadata to lvmetad
+ * which clears the VGFL_INVALID flag.
+ *
+ * If a command sees global_invalid, and wants the latest metadata,
+ * it should scan all devices to update lvmetad, and then send
+ * lvmetad the "set_global_info global_invalid=0" message to clear
+ * GLFL_INVALID.
+ *
+ * (When rescanning devices to update lvmetad, the command must use
+ * the global filter cmd->lvmetad_filter so that it processes the same
+ * devices that are seen by lvmetad.)
+ *
+ * The lvmetad INVALID flags can be set by sending lvmetad the messages:
+ *
+ * . set_vg_info with the latest VG seqno.  If the VG seqno is larger
+ *   than the cached VG seqno, VGFL_INVALID is set for the VG.
+ *
+ * . set_global_info with global_invalid=1 sets GLFL_INVALID.
+ *
+ * Different entities could use these functions to invalidate metadata
+ * if/when they detected that the cache is stale.  How they detect that
+ * the cache is stale depends on the details of the specific entity.
+ *
+ * In the case of lvmlockd, it embeds values into its locks to keep track
+ * of when other nodes have changed metadata on disk related to those locks.
+ * When acquring locks it can look at these values and detect that
+ * the metadata associated with the lock has been changed.
+ * When the values change, it uses set_vg_info/set_global_info to
+ * invalidate the lvmetad cache.
+ *
+ * The values that lvmlockd distributes through its locks are the
+ * latest VG seqno in VG locks and a global counter in the global lock.
+ * When a host acquires a VG lock and sees that the embedded seqno is
+ * larger than it was previously, it knows that it should invalidate the
+ * lvmetad cache for the VG.  If the host acquires the global lock
+ * and sees that the counter is larger than previously, it knows that
+ * it should invalidate the global info in lvmetad.  This invalidation
+ * is done before the lock is returned to the command.  This way the
+ * invalid flag will be set on the metadata before the command reads
+ * it from lvmetad.
+ */
+
+struct vg_info {
+	int64_t external_version;
+	uint32_t flags; /* VGFL_ */
+};
+
+#define GLFL_INVALID 0x00000001
+#define VGFL_INVALID 0x00000001
+
 typedef struct {
 	log_state *log; /* convenience */
 	const char *log_config;
@@ -40,6 +131,8 @@ typedef struct {

 	struct dm_hash_table *vgid_to_metadata;
 	struct dm_hash_table *vgid_to_vgname;
+	struct dm_hash_table *vgid_to_outdated_pvs;
+	struct dm_hash_table *vgid_to_info;
 	struct dm_hash_table *vgname_to_vgid;
 	struct dm_hash_table *pvid_to_vgid;
 	struct {
@@ -50,6 +143,7 @@ typedef struct {
 		pthread_mutex_t pvid_to_vgid;
 	} lock;
 	char token[128];
+	uint32_t flags; /* GLFL_ */
 	pthread_mutex_t token_lock;
 } lvmetad_state;

@@ -60,17 +154,19 @@ static void destroy_metadata_hashes(lvmetad_state *s)
 	dm_hash_iterate(n, s->vgid_to_metadata)
 		dm_config_destroy(dm_hash_get_data(s->vgid_to_metadata, n));

+	dm_hash_iterate(n, s->vgid_to_outdated_pvs)
+		dm_config_destroy(dm_hash_get_data(s->vgid_to_outdated_pvs, n));
+
 	dm_hash_iterate(n, s->pvid_to_pvmeta)
 		dm_config_destroy(dm_hash_get_data(s->pvid_to_pvmeta, n));

 	dm_hash_destroy(s->pvid_to_pvmeta);
 	dm_hash_destroy(s->vgid_to_metadata);
 	dm_hash_destroy(s->vgid_to_vgname);
+	dm_hash_destroy(s->vgid_to_outdated_pvs);
+	dm_hash_destroy(s->vgid_to_info);
 	dm_hash_destroy(s->vgname_to_vgid);

-	dm_hash_iterate(n, s->device_to_pvid)
-		dm_free(dm_hash_get_data(s->device_to_pvid, n));
-
 	dm_hash_destroy(s->device_to_pvid);
 	dm_hash_destroy(s->pvid_to_vgid);
 }
@@ -81,6 +177,8 @@ static void create_metadata_hashes(lvmetad_state *s)
 	s->device_to_pvid = dm_hash_create(32);
 	s->vgid_to_metadata = dm_hash_create(32);
 	s->vgid_to_vgname = dm_hash_create(32);
+	s->vgid_to_outdated_pvs = dm_hash_create(32);
+	s->vgid_to_info = dm_hash_create(32);
 	s->pvid_to_vgid = dm_hash_create(32);
 	s->vgname_to_vgid = dm_hash_create(32);
 }
@@ -244,6 +342,30 @@ static int update_pv_status(lvmetad_state *s,
 	return complete;
 }

+static struct dm_config_node *add_last_node(struct dm_config_tree *cft, const char *node_name)
+{
+	struct dm_config_node *cn, *last;
+
+	cn = cft->root;
+	last = cn;
+
+	while (cn->sib) {
+		last = cn->sib;
+		cn = last;
+	}
+
+	cn = dm_config_create_node(cft, node_name);
+	if (!cn)
+		return NULL;
+
+	cn->v = NULL;
+	cn->sib = NULL;
+	cn->parent = cft->root;
+	last->sib = cn;
+
+	return cn;
+}
+
 static struct dm_config_node *make_pv_node(lvmetad_state *s, const char *pvid,
 					   struct dm_config_tree *cft,
 					   struct dm_config_node *parent,
@@ -307,6 +429,9 @@ static response pv_list(lvmetad_state *s, request r)
 		cn = make_pv_node(s, id, res.cft, cn_pvs, cn);
 	}

+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
+
 	unlock_pvid_to_pvmeta(s);

 	return res;
@@ -351,6 +476,9 @@ static response pv_lookup(lvmetad_state *s, request r)
 	pv->key = "physical_volume";
 	unlock_pvid_to_pvmeta(s);

+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
+
 	return res;
 }

@@ -419,14 +547,87 @@ static response vg_list(lvmetad_state *s, request r)
 	}

 	unlock_vgid_to_metadata(s);
+
+	if (s->flags & GLFL_INVALID)
+		add_last_node(res.cft, "global_invalid");
 bad:
 	return res;
 }

+static void mark_outdated_pv(lvmetad_state *s, const char *vgid, const char *pvid)
+{
+	struct dm_config_tree *pvmeta, *outdated_pvs;
+	struct dm_config_node *list, *cft_vgid;
+	struct dm_config_value *v;
+
+	lock_pvid_to_pvmeta(s);
+	pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
+	unlock_pvid_to_pvmeta(s);
+
+	/* if the MDA exists and is used, it will have ignore=0 set */
+	if (!pvmeta ||
+	    (dm_config_find_int64(pvmeta->root, "pvmeta/mda0/ignore", 1) &&
+	     dm_config_find_int64(pvmeta->root, "pvmeta/mda1/ignore", 1)))
+		return;
+
+	WARN(s, "PV %s has outdated metadata", pvid);
+
+	outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
+	if (!outdated_pvs) {
+		if (!(outdated_pvs = dm_config_from_string("outdated_pvs/pv_list = []")) ||
+		    !(cft_vgid = make_text_node(outdated_pvs, "vgid", dm_pool_strdup(outdated_pvs->mem, vgid),
+						outdated_pvs->root, NULL)))
+			abort();
+		if(!dm_hash_insert(s->vgid_to_outdated_pvs, cft_vgid->v->v.str, outdated_pvs))
+			abort();
+		DEBUGLOG(s, "created outdated_pvs list for VG %s", vgid);
+	}
+
+	list = dm_config_find_node(outdated_pvs->root, "outdated_pvs/pv_list");
+	v = list->v;
+	while (v) {
+		if (v->type != DM_CFG_EMPTY_ARRAY && !strcmp(v->v.str, pvid))
+			return;
+		v = v->next;
+	}
+	if (!(v = dm_config_create_value(outdated_pvs)))
+		abort();
+	v->type = DM_CFG_STRING;
+	v->v.str = dm_pool_strdup(outdated_pvs->mem, pvid);
+	v->next = list->v;
+	list->v = v;
+}
+
+static void chain_outdated_pvs(lvmetad_state *s, const char *vgid, struct dm_config_tree *metadata_cft, struct dm_config_node *metadata)
+{
+	struct dm_config_tree *cft = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid), *pvmeta;
+	struct dm_config_node *pv, *res, *out_pvs = cft ? dm_config_find_node(cft->root, "outdated_pvs/pv_list") : NULL;
+	struct dm_config_value *pvs_v = out_pvs ? out_pvs->v : NULL;
+	if (!pvs_v)
+		return;
+	if (!(res = make_config_node(metadata_cft, "outdated_pvs", metadata_cft->root, 0)))
+		return; /* oops */
+	res->sib = metadata->child;
+	metadata->child = res;
+	for (; pvs_v && pvs_v->type != DM_CFG_EMPTY_ARRAY; pvs_v = pvs_v->next) {
+		pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvs_v->v.str);
+		if (!pvmeta) {
+			WARN(s, "metadata for PV %s not found", pvs_v->v.str);
+			continue;
+		}
+		if (!(pv = dm_config_clone_node(metadata_cft, pvmeta->root, 0)))
+			continue;
+		pv->key = dm_config_find_str(pv, "pvmeta/id", NULL);
+		pv->sib = res->child;
+		res->child = pv;
+	}
+}
+
 static response vg_lookup(lvmetad_state *s, request r)
 {
 	struct dm_config_tree *cft;
 	struct dm_config_node *metadata, *n;
+	struct vg_info *info;
 	response res = { 0 };

 	const char *uuid = daemon_request_str(r, "uuid", NULL);
@@ -489,6 +690,17 @@ static response vg_lookup(lvmetad_state *s, request r)
 	unlock_vg(s, uuid);

 	update_pv_status(s, res.cft, n, 1); /* FIXME report errors */
+	chain_outdated_pvs(s, uuid, res.cft, n);
+
+        if (s->flags & GLFL_INVALID)
+                add_last_node(res.cft, "global_invalid");
+
+	info = dm_hash_lookup(s->vgid_to_info, uuid);
+	if (info && (info->flags & VGFL_INVALID)) {
+		n = add_last_node(res.cft, "vg_invalid");
+		if (!n)
+			goto bad;
+	}

 	return res;
 bad:
@@ -496,65 +708,13 @@ bad:
 	return reply_fail("out of memory");
 }

-/* Test if the doubles are close enough to be considered equal */
-static int close_enough(double d1, double d2)
-{
-	return fabs(d1 - d2) < DBL_EPSILON;
-}
-
-static int compare_value(struct dm_config_value *a, struct dm_config_value *b)
-{
-	int r = 0;
-
-	if (a->type > b->type)
-		return 1;
-	if (a->type < b->type)
-		return -1;
-
-	switch (a->type) {
-	case DM_CFG_STRING: r = strcmp(a->v.str, b->v.str); break;
-	case DM_CFG_FLOAT: r = close_enough(a->v.f, b->v.f) ? 0 : (a->v.f > b->v.f) ? 1 : -1; break;
-	case DM_CFG_INT: r = (a->v.i == b->v.i) ? 0 : (a->v.i > b->v.i) ? 1 : -1; break;
-	case DM_CFG_EMPTY_ARRAY: return 0;
-	}
-
-	if (r == 0 && a->next && b->next)
-		r = compare_value(a->next, b->next);
-	return r;
-}
-
-static int compare_config(struct dm_config_node *a, struct dm_config_node *b)
-{
-	int result = 0;
-	if (a->v && b->v)
-		result = compare_value(a->v, b->v);
-	if (a->v && !b->v)
-		result = 1;
-	if (!a->v && b->v)
-		result = -1;
-	if (a->child && b->child)
-		result = compare_config(a->child, b->child);
-
-	if (result) {
-		// DEBUGLOG("config inequality at %s / %s", a->key, b->key);
-		return result;
-	}
-
-	if (a->sib && b->sib)
-		result = compare_config(a->sib, b->sib);
-	if (a->sib && !b->sib)
-		result = 1;
-	if (!a->sib && b->sib)
-		result = -1;
-
-	return result;
-}
-
 static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_pvids);

+enum update_pvid_mode { UPDATE_ONLY, REMOVE_EMPTY, MARK_OUTDATED };
+
 /* You need to be holding the pvid_to_vgid lock already to call this. */
 static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
-			       const char *vgid, int nuke_empty)
+			       const char *vgid, int mode)
 {
 	struct dm_config_node *pv;
 	struct dm_hash_table *to_check;
@@ -574,11 +734,14 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
 		if (!(pvid = dm_config_find_str(pv->child, "id", NULL)))
 			continue;

-		if (nuke_empty &&
+		if (mode == REMOVE_EMPTY &&
 		    (vgid_old = dm_hash_lookup(s->pvid_to_vgid, pvid)) &&
 		    !dm_hash_insert(to_check, vgid_old, (void*) 1))
 			goto out;

+		if (mode == MARK_OUTDATED)
+			mark_outdated_pv(s, vgid, pvid);
+
 		if (!dm_hash_insert(s->pvid_to_vgid, pvid, (void*) vgid))
 			goto out;

@@ -602,10 +765,11 @@ static int update_pvid_to_vgid(lvmetad_state *s, struct dm_config_tree *vg,
 /* A pvid map lock needs to be held if update_pvids = 1. */
 static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
 {
-	struct dm_config_tree *old;
+	struct dm_config_tree *old, *outdated_pvs;
 	const char *oldname;
 	lock_vgid_to_metadata(s);
 	old = dm_hash_lookup(s->vgid_to_metadata, vgid);
+	outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid);
 	oldname = dm_hash_lookup(s->vgid_to_vgname, vgid);

 	if (!old) {
@@ -619,12 +783,15 @@ static int remove_metadata(lvmetad_state *s, const char *vgid, int update_pvids)
 	dm_hash_remove(s->vgid_to_metadata, vgid);
 	dm_hash_remove(s->vgid_to_vgname, vgid);
 	dm_hash_remove(s->vgname_to_vgid, oldname);
+	dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
 	unlock_vgid_to_metadata(s);

 	if (update_pvids)
 		/* FIXME: What should happen when update fails */
 		update_pvid_to_vgid(s, old, "#orphan", 0);
 	dm_config_destroy(old);
+	if (outdated_pvs)
+		dm_config_destroy(outdated_pvs);
 	return 1;
 }

@@ -668,7 +835,7 @@ static int vg_remove_if_missing(lvmetad_state *s, const char *vgid, int update_p
 * this function, so they can be safely destroyed after update_metadata returns
 * (anything that might have been retained is copied). */
 static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid,
-			   struct dm_config_node *metadata, int64_t *oldseq)
+			   struct dm_config_node *metadata, int64_t *oldseq, const char *pvid)
 {
 	struct dm_config_tree *cft = NULL;
 	struct dm_config_tree *old;
@@ -717,6 +884,10 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid

 	if (seq < haveseq) {
 		DEBUGLOG(s, "Refusing to update metadata for %s (at %d) to %d", _vgid, haveseq, seq);
+
+		if (pvid)
+			mark_outdated_pv(s, dm_config_find_str(old->root, "metadata/id", NULL), pvid);
+
 		/* TODO: notify the client that their metadata is out of date? */
 		retval = 1;
 		goto out;
@@ -739,6 +910,8 @@ static int update_metadata(lvmetad_state *s, const char *name, const char *_vgid

 	if (haveseq >= 0 && haveseq < seq) {
 		INFO(s, "Updating metadata for %s at %d to %d", _vgid, haveseq, seq);
+		if (oldseq)
+			update_pvid_to_vgid(s, old, vgid, MARK_OUTDATED);
 		/* temporarily orphan all of our PVs */
 		update_pvid_to_vgid(s, old, "#orphan", 0);
 	}
@@ -773,12 +946,46 @@ out: /* FIXME: We should probably abort() on partial failures. */
 	return retval;
 }

+static dev_t device_remove(lvmetad_state *s, struct dm_config_tree *pvmeta, dev_t device)
+{
+	struct dm_config_node *pvmeta_tmp;
+	struct dm_config_value *v = NULL;
+	dev_t alt_device = 0, prim_device = 0;
+
+	if ((pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/devices_alternate")))
+		v = pvmeta_tmp->v;
+
+	prim_device = dm_config_find_int64(pvmeta->root, "pvmeta/device", 0);
+
+	/* it is the primary device */
+	if (device > 0 && device == prim_device && pvmeta_tmp && pvmeta_tmp->v)
+	{
+		alt_device = pvmeta_tmp->v->v.i;
+		pvmeta_tmp->v = pvmeta_tmp->v->next;
+		pvmeta_tmp = dm_config_find_node(pvmeta->root, "pvmeta/device");
+		pvmeta_tmp->v->v.i = alt_device;
+	} else if (device != prim_device)
+		alt_device = prim_device;
+
+	/* it is an alternate device */
+	if (device > 0 && v && v->v.i == device)
+		pvmeta_tmp->v = v->next;
+	else while (device > 0 && pvmeta_tmp && v) {
+		if (v->next && v->next->v.i == device)
+			v->next = v->next->next;
+		v = v->next;
+	}
+
+	return alt_device;
+}
+
 static response pv_gone(lvmetad_state *s, request r)
 {
 	const char *pvid = daemon_request_str(r, "uuid", NULL);
 	int64_t device = daemon_request_int(r, "device", 0);
+	int64_t alt_device = 0;
 	struct dm_config_tree *pvmeta;
-	char *pvid_old, *vgid;
+	char *vgid;

 	DEBUGLOG(s, "pv_gone: %s / %" PRIu64, pvid, device);

@@ -792,15 +999,18 @@ static response pv_gone(lvmetad_state *s, request r)

 	DEBUGLOG(s, "pv_gone (updated): %s / %" PRIu64, pvid, device);

-	pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid);
-	pvid_old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device));
+	if (!(pvmeta = dm_hash_lookup(s->pvid_to_pvmeta, pvid)))
+		return reply_unknown("PVID does not exist");
 	vgid = dm_hash_lookup(s->pvid_to_vgid, pvid);

 	dm_hash_remove_binary(s->device_to_pvid, &device, sizeof(device));
-	dm_hash_remove(s->pvid_to_pvmeta, pvid);
-	unlock_pvid_to_pvmeta(s);

-	dm_free(pvid_old);
+	if (!(alt_device = device_remove(s, pvmeta, device)))
+		dm_hash_remove(s->pvid_to_pvmeta, pvid);
+
+	DEBUGLOG(s, "pv_gone alt_device = %" PRIu64, alt_device);
+
+	unlock_pvid_to_pvmeta(s);

 	if (vgid) {
 		if (!(vgid = dm_strdup(vgid)))
@@ -812,12 +1022,15 @@ static response pv_gone(lvmetad_state *s, request r)
 		dm_free(vgid);
 	}

-	if (!pvmeta)
-		return reply_unknown("PVID does not exist");
+	if (!alt_device)
+		dm_config_destroy(pvmeta);

-	dm_config_destroy(pvmeta);
-
-	return daemon_reply_simple("OK", NULL);
+	if (alt_device) {
+		return daemon_reply_simple("OK",
+					   "device = %"PRId64, alt_device,
+					   NULL);
+	} else
+		return daemon_reply_simple("OK", NULL );
 }

 static response pv_clear_all(lvmetad_state *s, request r)
@@ -845,11 +1058,11 @@ static response pv_found(lvmetad_state *s, request r)
 	const char *vgname = daemon_request_str(r, "vgname", NULL);
 	const char *vgid = daemon_request_str(r, "metadata/id", NULL);
 	const char *vgid_old = NULL;
-	struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta");
+	struct dm_config_node *pvmeta = dm_config_find_node(r.cft->root, "pvmeta"), *altdev = NULL;
+	struct dm_config_value *altdev_v;
 	uint64_t device, device_old_pvid = 0;
 	struct dm_config_tree *cft, *pvmeta_old_dev = NULL, *pvmeta_old_pvid = NULL;
 	char *old;
-	char *pvid_dup;
 	int complete = 0, orphan = 0;
 	int64_t seqno = -1, seqno_old = -1, changed = 0;

@@ -861,12 +1074,8 @@ static response pv_found(lvmetad_state *s, request r)
 	if (!dm_config_get_uint64(pvmeta, "pvmeta/device", &device))
 		return reply_fail("need PV device number");

-	if (!(cft = dm_config_create()) ||
-	    (!(pvid_dup = dm_strdup(pvid)))) {
-		if (cft)
-			dm_config_destroy(cft);
+	if (!(cft = dm_config_create()))
 		return reply_fail("out of memory");
-	}

 	lock_pvid_to_pvmeta(s);

@@ -875,7 +1084,6 @@ static response pv_found(lvmetad_state *s, request r)

 	if ((old = dm_hash_lookup_binary(s->device_to_pvid, &device, sizeof(device)))) {
 		pvmeta_old_dev = dm_hash_lookup(s->pvid_to_pvmeta, old);
-		dm_hash_remove(s->pvid_to_pvmeta, old);
 		vgid_old = dm_hash_lookup(s->pvid_to_vgid, old);
 	}

@@ -885,35 +1093,69 @@ static response pv_found(lvmetad_state *s, request r)
 	if (!(cft->root = dm_config_clone_node(cft, pvmeta, 0)))
                goto out_of_mem;

+	pvid = dm_config_find_str(cft->root, "pvmeta/id", NULL);
+
 	if (!pvmeta_old_pvid || compare_config(pvmeta_old_pvid->root, cft->root))
 		changed |= 1;

 	if (pvmeta_old_pvid && device != device_old_pvid) {
-		DEBUGLOG(s, "pv %s no longer on device %" PRIu64, pvid, device_old_pvid);
-		dm_free(dm_hash_lookup_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid)));
+		DEBUGLOG(s, "PV %s duplicated on device %" PRIu64, pvid, device_old_pvid);
 		dm_hash_remove_binary(s->device_to_pvid, &device_old_pvid, sizeof(device_old_pvid));
+		if (!dm_hash_insert_binary(s->device_to_pvid, &device_old_pvid,
+					   sizeof(device_old_pvid), (void*)pvid))
+			goto out_of_mem;
+		if ((altdev = dm_config_find_node(pvmeta_old_pvid->root, "pvmeta/devices_alternate"))) {
+			altdev = dm_config_clone_node(cft, altdev, 0);
+			chain_node(altdev, cft->root, 0);
+		} else
+			if (!(altdev = make_config_node(cft, "devices_alternate", cft->root, 0)))
+				goto out_of_mem;
+                altdev_v = altdev->v;
+                while (1) {
+			if (altdev_v && altdev_v->v.i == device_old_pvid)
+				break;
+			if (altdev_v)
+				altdev_v = altdev_v->next;
+			if (!altdev_v) {
+				if (!(altdev_v = dm_config_create_value(cft)))
+					goto out_of_mem;
+				altdev_v->next = altdev->v;
+				altdev->v = altdev_v;
+				altdev->v->v.i = device_old_pvid;
+				break;
+			}
+		};
+		altdev_v = altdev->v;
+		while (altdev_v) {
+			if (altdev_v->next && altdev_v->next->v.i == device)
+				altdev_v->next = altdev_v->next->next;
+			altdev_v = altdev_v->next;
+		}
 		changed |= 1;
 	}

 	if (!dm_hash_insert(s->pvid_to_pvmeta, pvid, cft) ||
-	    !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid_dup)) {
+	    !dm_hash_insert_binary(s->device_to_pvid, &device, sizeof(device), (void*)pvid)) {
 		dm_hash_remove(s->pvid_to_pvmeta, pvid);
 out_of_mem:
 		unlock_pvid_to_pvmeta(s);
 		dm_config_destroy(cft);
-		dm_free(pvid_dup);
 		dm_free(old);
 		return reply_fail("out of memory");
 	}

 	unlock_pvid_to_pvmeta(s);

-	dm_free(old);
-
 	if (pvmeta_old_pvid)
 		dm_config_destroy(pvmeta_old_pvid);
-	if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid)
-		dm_config_destroy(pvmeta_old_dev);
+	if (pvmeta_old_dev && pvmeta_old_dev != pvmeta_old_pvid) {
+		dev_t d = dm_config_find_int64(pvmeta_old_dev->root, "pvmeta/device", 0);
+		WARN(s, "pv_found: stray device %"PRId64, d);
+		if (!device_remove(s, pvmeta_old_dev, device)) {
+			dm_hash_remove(s->pvid_to_pvmeta, old);
+			dm_config_destroy(pvmeta_old_dev);
+		}
+	}

 	if (metadata) {
 		if (!vgid)
@@ -924,7 +1166,7 @@ out_of_mem:
 		if (daemon_request_int(r, "metadata/seqno", -1) < 0)
 			return reply_fail("need VG seqno");

-		if (!update_metadata(s, vgname, vgid, metadata, &seqno_old))
+		if (!update_metadata(s, vgname, vgid, metadata, &seqno_old, pvid))
 			return reply_fail("metadata update failed");
 		changed |= (seqno_old != dm_config_find_int(metadata, "metadata/seqno", -1));
 	} else {
@@ -972,6 +1214,39 @@ out_of_mem:
 				   NULL);
 }

+static response vg_clear_outdated_pvs(lvmetad_state *s, request r)
+{
+	struct dm_config_tree *outdated_pvs;
+	const char *vgid = daemon_request_str(r, "vgid", NULL);
+
+	if (!vgid)
+		return reply_fail("need VG UUID");
+
+	if ((outdated_pvs = dm_hash_lookup(s->vgid_to_outdated_pvs, vgid))) {
+		dm_config_destroy(outdated_pvs);
+		dm_hash_remove(s->vgid_to_outdated_pvs, vgid);
+	}
+	return daemon_reply_simple("OK", NULL);
+}
+
+static void vg_info_update(lvmetad_state *s, const char *uuid,
+                           struct dm_config_node *metadata)
+{
+	struct vg_info *info;
+	int64_t cache_version;
+
+	cache_version = dm_config_find_int64(metadata, "metadata/seqno", -1);
+	if (cache_version == -1)
+		return;
+
+	info = (struct vg_info *) dm_hash_lookup(s->vgid_to_info, uuid);
+	if (!info)
+		return;
+
+	if (cache_version >= info->external_version)
+		info->flags &= ~VGFL_INVALID;
+}
+
 static response vg_update(lvmetad_state *s, request r)
 {
 	struct dm_config_node *metadata = dm_config_find_node(r.cft->root, "metadata");
@@ -987,8 +1262,10 @@ static response vg_update(lvmetad_state *s, request r)

 		/* TODO defer metadata update here; add a separate vg_commit
 		 * call; if client does not commit, die */
-		if (!update_metadata(s, vgname, vgid, metadata, NULL))
+		if (!update_metadata(s, vgname, vgid, metadata, NULL, NULL))
 			return reply_fail("metadata update failed");
+
+		vg_info_update(s, vgid, metadata);
 	}
 	return daemon_reply_simple("OK", NULL);
 }
@@ -1009,6 +1286,71 @@ static response vg_remove(lvmetad_state *s, request r)
 	return daemon_reply_simple("OK", NULL);
 }

+static response set_global_info(lvmetad_state *s, request r)
+{
+	const int global_invalid = daemon_request_int(r, "global_invalid", -1);
+
+	if (global_invalid == 1)
+		s->flags |= GLFL_INVALID;
+
+	else if (global_invalid == 0)
+		s->flags &= ~GLFL_INVALID;
+
+	return daemon_reply_simple("OK", NULL);
+}
+
+static response get_global_info(lvmetad_state *s, request r)
+{
+	return daemon_reply_simple("OK", "global_invalid = %d",
+					 (s->flags & GLFL_INVALID) ? 1 : 0,
+					 NULL);
+}
+
+static response set_vg_info(lvmetad_state *s, request r)
+{
+	struct dm_config_tree *vg;
+	struct vg_info *info;
+	const char *uuid = daemon_request_str(r, "uuid", NULL);
+	const int64_t new_version = daemon_request_int(r, "version", -1);
+	int64_t cache_version;
+
+	if (!uuid)
+		goto out;
+
+	if (new_version == -1)
+		goto out;
+
+	vg = dm_hash_lookup(s->vgid_to_metadata, uuid);
+	if (!vg)
+		goto out;
+
+	if (!new_version)
+		goto inval;
+
+	cache_version = dm_config_find_int64(vg->root, "metadata/seqno", -1);
+
+	if (cache_version != -1 && new_version != -1 && cache_version >= new_version)
+		goto out;
+inval:
+	info = dm_hash_lookup(s->vgid_to_info, uuid);
+	if (!info) {
+		info = malloc(sizeof(struct vg_info));
+		if (!info)
+			goto bad;
+		memset(info, 0, sizeof(struct vg_info));
+		if (!dm_hash_insert(s->vgid_to_info, uuid, (void*)info))
+			goto bad;
+	}
+
+	info->external_version = new_version;
+	info->flags |= VGFL_INVALID;
+
+out:
+	return daemon_reply_simple("OK", NULL);
+bad:
+	return reply_fail("out of memory");
+}
+
 static void _dump_cft(struct buffer *buf, struct dm_hash_table *ht, const char *key_addr)
 {
 	struct dm_hash_node *n;
@@ -1046,6 +1388,52 @@ static void _dump_pairs(struct buffer *buf, struct dm_hash_table *ht, const char
 	buffer_append(buf, "}\n");
 }

+static void _dump_info_version(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+	char *append;
+	struct dm_hash_node *n = dm_hash_get_first(ht);
+	struct vg_info *info;
+
+	buffer_append(buf, name);
+	buffer_append(buf, " {\n");
+
+	while (n) {
+		const char *key = dm_hash_get_key(ht, n);
+		info = dm_hash_get_data(ht, n);
+		buffer_append(buf, "    ");
+		(void) dm_asprintf(&append, "%s = %lld", key, (long long)info->external_version);
+		if (append)
+			buffer_append(buf, append);
+		buffer_append(buf, "\n");
+		dm_free(append);
+		n = dm_hash_get_next(ht, n);
+	}
+	buffer_append(buf, "}\n");
+}
+
+static void _dump_info_flags(struct buffer *buf, struct dm_hash_table *ht, const char *name, int int_key)
+{
+	char *append;
+	struct dm_hash_node *n = dm_hash_get_first(ht);
+	struct vg_info *info;
+
+	buffer_append(buf, name);
+	buffer_append(buf, " {\n");
+
+	while (n) {
+		const char *key = dm_hash_get_key(ht, n);
+		info = dm_hash_get_data(ht, n);
+		buffer_append(buf, "    ");
+		(void) dm_asprintf(&append, "%s = %llx", key, (long long)info->flags);
+		if (append)
+			buffer_append(buf, append);
+		buffer_append(buf, "\n");
+		dm_free(append);
+		n = dm_hash_get_next(ht, n);
+	}
+	buffer_append(buf, "}\n");
+}
+
 static response dump(lvmetad_state *s)
 {
 	response res = { 0 };
@@ -1068,6 +1456,9 @@ static response dump(lvmetad_state *s)
 	buffer_append(b, "\n# VGID to VGNAME mapping\n\n");
 	_dump_pairs(b, s->vgid_to_vgname, "vgid_to_vgname", 0);

+	buffer_append(b, "\n# VGID to outdated PVs mapping\n\n");
+	_dump_cft(b, s->vgid_to_outdated_pvs, "outdated_pvs/vgid");
+
 	buffer_append(b, "\n# VGNAME to VGID mapping\n\n");
 	_dump_pairs(b, s->vgname_to_vgid, "vgname_to_vgid", 0);

@@ -1077,6 +1468,12 @@ static response dump(lvmetad_state *s)
 	buffer_append(b, "\n# DEVICE to PVID mapping\n\n");
 	_dump_pairs(b, s->device_to_pvid, "device_to_pvid", 1);

+	buffer_append(b, "\n# VGID to INFO version mapping\n\n");
+	_dump_info_version(b, s->vgid_to_info, "vgid_to_info", 0);
+
+	buffer_append(b, "\n# VGID to INFO flags mapping\n\n");
+	_dump_info_flags(b, s->vgid_to_info, "vgid_to_info", 0);
+
 	unlock_pvid_to_vgid(s);
 	unlock_pvid_to_pvmeta(s);
 	unlock_vgid_to_metadata(s);
@@ -1098,7 +1495,7 @@ static response handler(daemon_state s, client_handle h, request r)
 		return daemon_reply_simple("OK", NULL);
 	}

-	if (strcmp(token, state->token) && strcmp(rq, "dump")) {
+	if (strcmp(token, state->token) && strcmp(rq, "dump") && strcmp(token, "skip")) {
 		pthread_mutex_unlock(&state->token_lock);
 		return daemon_reply_simple("token_mismatch",
 					   "expected = %s", state->token,
@@ -1127,6 +1524,9 @@ static response handler(daemon_state s, client_handle h, request r)
 	if (!strcmp(rq, "vg_update"))
 		return vg_update(state, r);

+	if (!strcmp(rq, "vg_clear_outdated_pvs"))
+		return vg_clear_outdated_pvs(state, r);
+
 	if (!strcmp(rq, "vg_remove"))
 		return vg_remove(state, r);

@@ -1139,6 +1539,15 @@ static response handler(daemon_state s, client_handle h, request r)
 	if (!strcmp(rq, "vg_list"))
 		return vg_list(state, r);

+	if (!strcmp(rq, "set_global_info"))
+		return set_global_info(state, r);
+
+	if (!strcmp(rq, "get_global_info"))
+		return get_global_info(state, r);
+
+	if (!strcmp(rq, "set_vg_info"))
+		return set_vg_info(state, r);
+
 	if (!strcmp(rq, "dump"))
 		return dump(state);

--- a/daemons/lvmetad/testclient.c
+++ b/daemons/lvmetad/testclient.c
@@ -1,3 +1,18 @@
+/*
+ * Copyright (C) 2011-2014 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include "tool.h"
+
 #include "lvmetad-client.h"
 #include "label.h"
 #include "lvmcache.h"
@@ -109,7 +124,7 @@ int main(int argc, char **argv) {

 	if (argc > 1) {
 		int i;
-		struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0);
+		struct cmd_context *cmd = create_toolcontext(0, NULL, 0, 0, 1, 1);
 		for (i = 1; i < argc; ++i) {
 			const char *uuid = NULL;
 			scan(h, argv[i]);
--- a/daemons/lvmlockd/.gitignore
+++ b/daemons/lvmlockd/.gitignore
@@ -0,0 +1,2 @@
+lvmlockctl
+lvmlockd
--- a/daemons/lvmlockd/Makefile.in
+++ b/daemons/lvmlockd/Makefile.in
@@ -0,0 +1,66 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmlockd-core.c
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+  SOURCES += lvmlockd-sanlock.c
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+  SOURCES += lvmlockd-dlm.c
+endif
+
+TARGETS = lvmlockd lvmlockctl
+
+.PHONY: install_lvmlockd
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+ifeq ("@BUILD_LOCKDSANLOCK@", "yes")
+  LIBS += -lsanlock_client
+endif
+
+ifeq ("@BUILD_LOCKDDLM@", "yes")
+  LIBS += -ldlm_lt
+endif
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+
+lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+lvmlockctl: lvmlockctl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmlockctl.o $(LVMLIBS)
+
+install_lvmlockd: lvmlockd
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvmlockctl: lvmlockctl
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmlockd install_lvmlockctl
+
+install: install_lvm2
--- a/daemons/lvmlockd/lvmlockctl.c
+++ b/daemons/lvmlockd/lvmlockctl.c
@@ -0,0 +1,745 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#include "tool.h"
+
+#include "lvmlockd-client.h"
+
+#include <stddef.h>
+#include <getopt.h>
+#include <signal.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syslog.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+static int quit = 0;
+static int info = 0;
+static int dump = 0;
+static int wait_opt = 0;
+static int force_opt = 0;
+static int kill_vg = 0;
+static int drop_vg = 0;
+static int gl_enable = 0;
+static int gl_disable = 0;
+static int stop_lockspaces = 0;
+static char *arg_vg_name = NULL;
+
+#define DUMP_SOCKET_NAME "lvmlockd-dump.sock"
+#define DUMP_BUF_SIZE (1024 * 1024)
+static char dump_buf[DUMP_BUF_SIZE+1];
+static int dump_len;
+static struct sockaddr_un dump_addr;
+static socklen_t dump_addrlen;
+
+daemon_handle _lvmlockd;
+
+#define log_error(fmt, args...) \
+do { \
+	printf(fmt "\n", ##args); \
+} while (0)
+
+#define MAX_LINE 512
+
+/* copied from lvmlockd-internal.h */
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+/*
+ * lvmlockd dumps the client info before the lockspaces,
+ * so we can look up client info when printing lockspace info.
+ */
+
+#define MAX_CLIENTS 100
+
+struct client_info {
+	uint32_t client_id;
+	int pid;
+	char name[MAX_NAME+1];
+};
+
+static struct client_info clients[MAX_CLIENTS];
+static int num_clients;
+
+static void save_client_info(char *line)
+{
+	uint32_t pid = 0;
+	int fd = 0;
+	int pi = 0;
+	uint32_t client_id = 0;
+	char name[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=client pid=%u fd=%d pi=%d id=%u name=%s",
+	       &pid, &fd, &pi, &client_id, name);
+
+	clients[num_clients].client_id = client_id;
+	clients[num_clients].pid = pid;
+	strcpy(clients[num_clients].name, name);
+	num_clients++;
+}
+
+static void find_client_info(uint32_t client_id, uint32_t *pid, char *cl_name)
+{
+	int i;
+
+	for (i = 0; i < num_clients; i++) {
+		if (clients[i].client_id == client_id) {
+			*pid = clients[i].pid;
+			strcpy(cl_name, clients[i].name);
+			return;
+		}
+	}
+}
+
+static int first_ls = 1;
+
+static void format_info_ls(char *line)
+{
+	char ls_name[MAX_NAME+1] = { 0 };
+	char vg_name[MAX_NAME+1] = { 0 };
+	char vg_uuid[MAX_NAME+1] = { 0 };
+	char vg_sysid[MAX_NAME+1] = { 0 };
+	char lock_args[MAX_ARGS+1] = { 0 };
+	char lock_type[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=ls ls_name=%s vg_name=%s vg_uuid=%s vg_sysid=%s vg_args=%s lm_type=%s",
+	       ls_name, vg_name, vg_uuid, vg_sysid, lock_args, lock_type);
+
+	if (!first_ls)
+		printf("\n");
+	first_ls = 0;
+
+	printf("VG %s lock_type=%s %s\n", vg_name, lock_type, vg_uuid);
+
+	printf("LS %s %s\n", lock_type, ls_name);
+}
+
+static void format_info_ls_action(char *line)
+{
+	uint32_t client_id = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	char version[MAX_NAME+1] = { 0 };
+	char op[MAX_NAME+1] = { 0 };
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	sscanf(line, "info=ls_action client_id=%u %s %s op=%s",
+	       &client_id, flags, version, op);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	printf("OP %s pid %u (%s)\n", op, pid, cl_name);
+}
+
+static void format_info_r(char *line, char *r_name_out, char *r_type_out)
+{
+	char r_name[MAX_NAME+1] = { 0 };
+	char r_type[4] = { 0 };
+	char mode[4] = { 0 };
+	char sh_count[MAX_NAME+1] = { 0 };
+	uint32_t ver = 0;
+
+	sscanf(line, "info=r name=%s type=%s mode=%s %s version=%u",
+	       r_name, r_type, mode, sh_count, &ver);
+
+	/* when mode is not un, wait and print each lk line */
+
+	if (strcmp(mode, "un")) {
+		strcpy(r_name_out, r_name);
+		strcpy(r_type_out, r_type);
+		return;
+	}
+
+	/* when mode is un, there will be no lk lines, so print now */
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LK GL un ver %u\n", ver);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LK VG un ver %u\n", ver);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LK LV un %s\n", r_name);
+	}
+}
+
+static void format_info_lk(char *line, char *r_name, char *r_type)
+{
+	char mode[4] = { 0 };
+	uint32_t ver = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	uint32_t client_id = 0;
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	if (!r_name[0] || !r_type[0]) {
+		printf("format_info_lk error r_name %s r_type %s\n", r_name, r_type);
+		printf("%s\n", line);
+		return;
+	}
+
+	sscanf(line, "info=lk mode=%s version=%u %s client_id=%u",
+	       mode, &ver, flags, &client_id);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LK GL %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LK VG %s ver %u pid %u (%s)\n", mode, ver, pid, cl_name);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LK LV %s %s\n", mode, r_name);
+	}
+}
+
+static void format_info_r_action(char *line, char *r_name, char *r_type)
+{
+	uint32_t client_id = 0;
+	char flags[MAX_NAME+1] = { 0 };
+	char version[MAX_NAME+1] = { 0 };
+	char op[MAX_NAME+1] = { 0 };
+	char rt[4] = { 0 };
+	char mode[4] = { 0 };
+	char lm[MAX_NAME+1] = { 0 };
+	char result[MAX_NAME+1] = { 0 };
+	char lm_rv[MAX_NAME+1] = { 0 };
+	uint32_t pid = 0;
+	char cl_name[MAX_NAME+1] = { 0 };
+
+	if (!r_name[0] || !r_type[0]) {
+		printf("format_info_r_action error r_name %s r_type %s\n", r_name, r_type);
+		printf("%s\n", line);
+		return;
+	}
+
+	sscanf(line, "info=r_action client_id=%u %s %s op=%s rt=%s mode=%s %s %s %s",
+	       &client_id, flags, version, op, rt, mode, lm, result, lm_rv);
+
+	find_client_info(client_id, &pid, cl_name);
+
+	if (strcmp(op, "lock")) {
+		printf("OP %s pid %u (%s)", op, pid, cl_name);
+		return;
+	}
+
+	if (!strcmp(r_type, "gl")) {
+		printf("LW GL %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+	} else if (!strcmp(r_type, "vg")) {
+		printf("LW VG %s ver %u pid %u (%s)\n", mode, 0, pid, cl_name);
+
+	} else if (!strcmp(r_type, "lv")) {
+		printf("LW LV %s %s\n", mode, r_name);
+	}
+}
+
+static void format_info_line(char *line, char *r_name, char *r_type)
+{
+	if (!strncmp(line, "info=structs ", strlen("info=structs "))) {
+		/* only print this in the raw info dump */
+
+	} else if (!strncmp(line, "info=client ", strlen("info=client "))) {
+		save_client_info(line);
+
+	} else if (!strncmp(line, "info=ls ", strlen("info=ls "))) {
+		format_info_ls(line);
+
+	} else if (!strncmp(line, "info=ls_action ", strlen("info=ls_action "))) {
+		format_info_ls_action(line);
+
+	} else if (!strncmp(line, "info=r ", strlen("info=r "))) {
+		/*
+		 * r_name/r_type are reset when a new resource is found.
+		 * They are reused for the lock and action lines that
+		 * follow a resource line.
+		 */
+		memset(r_name, 0, MAX_NAME+1);
+		memset(r_type, 0, MAX_NAME+1);
+		format_info_r(line, r_name, r_type);
+
+	} else if (!strncmp(line, "info=lk ", strlen("info=lk "))) {
+		/* will use info from previous r */
+		format_info_lk(line, r_name, r_type);
+
+	} else if (!strncmp(line, "info=r_action ", strlen("info=r_action "))) {
+		/* will use info from previous r */
+		format_info_r_action(line, r_name, r_type);
+	} else {
+		printf("UN %s\n", line);
+	}
+}
+
+static void format_info(void)
+{
+	char line[MAX_LINE];
+	char r_name[MAX_NAME+1];
+	char r_type[MAX_NAME+1];
+	int i, j;
+
+	j = 0;
+	memset(line, 0, sizeof(line));
+
+	for (i = 0; i < dump_len; i++) {
+		line[j++] = dump_buf[i];
+
+		if ((line[j-1] == '\n') || (line[j-1] == '\0')) {
+			format_info_line(line, r_name, r_type);
+			j = 0;
+			memset(line, 0, sizeof(line));
+		}
+	}
+}
+
+
+static daemon_reply _lvmlockd_send(const char *req_name, ...)
+{
+	va_list ap;
+	daemon_reply repl;
+	daemon_request req;
+
+	req = daemon_request_make(req_name);
+
+	va_start(ap, req_name);
+	daemon_request_extend_v(req, ap);
+	va_end(ap);
+
+	repl = daemon_send(_lvmlockd, req);
+
+	daemon_request_destroy(req);
+
+	return repl;
+}
+
+/* See the same in lib/locking/lvmlockd.c */
+#define NO_LOCKD_RESULT -1000
+
+static int _lvmlockd_result(daemon_reply reply, int *result)
+{
+	int reply_result;
+
+	if (reply.error) {
+		log_error("lvmlockd_result reply error %d", reply.error);
+		return 0;
+	}
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+		log_error("lvmlockd_result bad response");
+		return 0;
+	}
+
+	reply_result = daemon_reply_int(reply, "op_result", NO_LOCKD_RESULT);
+	if (reply_result == -1000) {
+		log_error("lvmlockd_result no op_result");
+		return 0;
+	}
+
+	*result = reply_result;
+
+	return 1;
+}
+
+static int do_quit(void)
+{
+	daemon_reply reply;
+	int rv = 0;
+
+	reply = daemon_send_simple(_lvmlockd, "quit", NULL);
+
+	if (reply.error) {
+		log_error("reply error %d", reply.error);
+		rv = reply.error;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int setup_dump_socket(void)
+{
+	int s, rv;
+
+	s = socket(AF_LOCAL, SOCK_DGRAM, 0);
+	if (s < 0)
+		return s;
+
+	memset(&dump_addr, 0, sizeof(dump_addr));
+	dump_addr.sun_family = AF_LOCAL;
+	strcpy(&dump_addr.sun_path[1], DUMP_SOCKET_NAME);
+	dump_addrlen = sizeof(sa_family_t) + strlen(dump_addr.sun_path+1) + 1;
+
+	rv = bind(s, (struct sockaddr *) &dump_addr, dump_addrlen);
+	if (rv < 0) {
+		if (!close(s))
+			log_error("failed to close dump socket");
+		return rv;
+	}
+
+	return s;
+}
+
+static int do_dump(const char *req_name)
+{
+	daemon_reply reply;
+	int result;
+	int fd, rv = 0;
+
+	fd = setup_dump_socket();
+	if (fd < 0) {
+		log_error("socket error %d", fd);
+		return fd;
+	}
+
+	reply = daemon_send_simple(_lvmlockd, req_name, NULL);
+
+	if (reply.error) {
+		log_error("reply error %d", reply.error);
+		rv = reply.error;
+		goto out;
+	}
+
+	result = daemon_reply_int(reply, "result", 0);
+	dump_len = daemon_reply_int(reply, "dump_len", 0);
+
+	daemon_reply_destroy(reply);
+
+	if (result < 0) {
+		rv = result;
+		log_error("result %d", result);
+	}
+
+	if (!dump_len)
+		goto out;
+
+	memset(dump_buf, 0, sizeof(dump_buf));
+
+	rv = recvfrom(fd, dump_buf, dump_len, MSG_WAITALL,
+		      (struct sockaddr *)&dump_addr, &dump_addrlen);
+	if (rv < 0) {
+		log_error("recvfrom error %d %d", rv, errno);
+		rv = -errno;
+		goto out;
+	}
+
+	rv = 0;
+	if ((info && dump) || !strcmp(req_name, "dump"))
+		printf("%s\n", dump_buf);
+	else
+		format_info();
+out:
+	if (close(fd))
+		log_error("failed to close dump socket %d", fd);
+	return rv;
+}
+
+static int do_able(const char *req_name)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	reply = _lvmlockd_send(req_name,
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int do_stop_lockspaces(void)
+{
+	daemon_reply reply;
+	char opts[32];
+	int result;
+	int rv;
+
+	memset(opts, 0, sizeof(opts));
+
+	if (wait_opt)
+		strcat(opts, "wait ");
+	if (force_opt)
+		strcat(opts, "force ");
+
+	reply = _lvmlockd_send("stop_all",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"opts = %s", opts[0] ? opts : "none",
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static int do_kill(void)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	syslog(LOG_EMERG, "Lost access to sanlock lease storage in VG %s.", arg_vg_name);
+	/* These two lines explain the manual alternative to the FIXME below. */
+	syslog(LOG_EMERG, "Immediately deactivate LVs in VG %s.", arg_vg_name);
+	syslog(LOG_EMERG, "Once VG is unused, run lvmlockctl --drop %s.", arg_vg_name);
+
+	/*
+	 * It may not be strictly necessary to notify lvmlockd of the kill, but
+	 * lvmlockd can use this information to avoid attempting any new lock
+	 * requests in the VG (which would fail anyway), and can return an
+	 * error indicating that the VG has been killed.
+	 */
+
+	reply = _lvmlockd_send("kill_vg",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+
+	/*
+	 * FIXME: here is where we should implement a strong form of
+	 * blkdeactivate, and if it completes successfully, automatically call
+	 * do_drop() afterward.  (The drop step may not always be necessary
+	 * if the lvm commands run while shutting things down release all the
+	 * leases.)
+	 *
+	 * run_strong_blkdeactivate();
+	 * do_drop();
+	 */
+
+	return rv;
+}
+
+static int do_drop(void)
+{
+	daemon_reply reply;
+	int result;
+	int rv;
+
+	syslog(LOG_WARNING, "Dropping locks for VG %s.", arg_vg_name);
+
+	/*
+	 * Check for misuse by looking for any active LVs in the VG
+	 * and refusing this operation if found?  One possible way
+	 * to kill LVs (e.g. if fs cannot be unmounted) is to suspend
+	 * them, or replace them with the error target.  In that
+	 * case the LV will still appear to be active, but it is
+	 * safe to release the lock.
+	 */
+
+	reply = _lvmlockd_send("drop_vg",
+				"cmd = %s", "lvmlockctl",
+				"pid = %d", getpid(),
+				"vg_name = %s", arg_vg_name,
+				NULL);
+
+	if (!_lvmlockd_result(reply, &result)) {
+		log_error("lvmlockd result %d", result);
+		rv = result;
+	} else {
+		rv = 0;
+	}
+
+	daemon_reply_destroy(reply);
+	return rv;
+}
+
+static void print_usage(void)
+{
+	printf("lvmlockctl options\n");
+	printf("Options:\n");
+	printf("--help | -h\n");
+	printf("      Show this help information.\n");
+	printf("--quit | -q\n");
+	printf("      Tell lvmlockd to quit.\n");
+	printf("--info | -i\n");
+	printf("      Print lock state information from lvmlockd.\n");
+	printf("--dump | -d\n");
+	printf("      Print log buffer from lvmlockd.\n");
+	printf("--wait | -w 0|1\n");
+	printf("      Wait option for other commands.\n");
+	printf("--force | -f 0|1>\n");
+	printf("      Force option for other commands.\n");
+	printf("--kill | -k <vg_name>\n");
+	printf("      Kill access to the vg when sanlock cannot renew lease.\n");
+	printf("--drop | -r <vg_name>\n");
+	printf("      Clear locks for the vg after it has been killed and is no longer used.\n");
+	printf("--gl-enable <vg_name>\n");
+	printf("      Tell lvmlockd to enable the global lock in a sanlock vg.\n");
+	printf("--gl-disable <vg_name>\n");
+	printf("      Tell lvmlockd to disable the global lock in a sanlock vg.\n");
+	printf("--stop-lockspaces | -S\n");
+	printf("      Stop all lockspaces.\n");
+}
+
+static int read_options(int argc, char *argv[])
+{
+	int option_index = 0;
+	int c;
+
+	static struct option long_options[] = {
+		{"help",            no_argument,       0,  'h' },
+		{"quit",            no_argument,       0,  'q' },
+		{"info",            no_argument,       0,  'i' },
+		{"dump",            no_argument,       0,  'd' },
+		{"wait",            required_argument, 0,  'w' },
+		{"force",           required_argument, 0,  'f' },
+		{"kill",            required_argument, 0,  'k' },
+		{"drop",            required_argument, 0,  'r' },
+		{"gl-enable",       required_argument, 0,  'E' },
+		{"gl-disable",      required_argument, 0,  'D' },
+		{"stop-lockspaces", no_argument,       0,  'S' },
+		{0, 0, 0, 0 }
+	};
+
+	if (argc == 1) {
+		print_usage();
+		exit(0);
+	}
+
+	while (1) {
+		c = getopt_long(argc, argv, "hqidE:D:w:k:r:S", long_options, &option_index);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'h':
+			/* --help */
+			print_usage();
+			exit(0);
+		case 'q':
+			/* --quit */
+			quit = 1;
+			break;
+		case 'i':
+			/* --info */
+			info = 1;
+			break;
+		case 'd':
+			/* --dump */
+			dump = 1;
+			break;
+		case 'w':
+			wait_opt = atoi(optarg);
+			break;
+		case 'k':
+			kill_vg = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'r':
+			drop_vg = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'E':
+			gl_enable = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'D':
+			gl_disable = 1;
+			arg_vg_name = strdup(optarg);
+			break;
+		case 'S':
+			stop_lockspaces = 1;
+			break;
+		default:
+			print_usage();
+			exit(1);
+		}
+	}
+
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int rv = 0;
+
+	rv = read_options(argc, argv);
+	if (rv < 0)
+		return rv;
+
+	_lvmlockd = lvmlockd_open(NULL);
+
+	if (_lvmlockd.socket_fd < 0 || _lvmlockd.error) {
+		log_error("Cannot connect to lvmlockd.");
+		return -1;
+	}
+
+	if (quit) {
+		rv = do_quit();
+		goto out;
+	}
+
+	if (info) {
+		rv = do_dump("info");
+		goto out;
+	}
+
+	if (dump) {
+		rv = do_dump("dump");
+		goto out;
+	}
+
+	if (kill_vg) {
+		rv = do_kill();
+		goto out;
+	}
+
+	if (drop_vg) {
+		rv = do_drop();
+		goto out;
+	}
+
+	if (gl_enable) {
+		rv = do_able("enable_gl");
+		goto out;
+	}
+
+	if (gl_disable) {
+		rv = do_able("disable_gl");
+		goto out;
+	}
+
+	if (stop_lockspaces) {
+		rv = do_stop_lockspaces();
+		goto out;
+	}
+
+out:
+	lvmlockd_close(_lvmlockd);
+	return rv;
+}
--- a/daemons/lvmlockd/lvmlockd-client.h
+++ b/daemons/lvmlockd/lvmlockd-client.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_CLIENT_H
+#define _LVM_LVMLOCKD_CLIENT_H
+
+#include "daemon-client.h"
+
+#define LVMLOCKD_SOCKET DEFAULT_RUN_DIR "/lvmlockd.socket"
+
+/* Wrappers to open/close connection */
+
+static inline daemon_handle lvmlockd_open(const char *sock)
+{
+	daemon_info lvmlockd_info = {
+		.path = "lvmlockd",
+		.socket = sock ?: LVMLOCKD_SOCKET,
+		.protocol = "lvmlockd",
+		.protocol_version = 1,
+		.autostart = 0
+	};
+
+	return daemon_open(lvmlockd_info);
+}
+
+static inline void lvmlockd_close(daemon_handle h)
+{
+	return daemon_close(h);
+}
+
+/*
+ * Errors returned as the lvmlockd result value.
+ */
+#define ENOLS     210 /* lockspace not found */
+#define ESTARTING 211 /* lockspace is starting */
+#define EARGS     212
+#define EHOSTID   213
+#define EMANAGER  214
+#define EPREPARE  215
+#define ELOCKD    216
+#define EVGKILLED 217 /* sanlock lost access to leases and VG is killed. */
+#define ELOCKIO   218 /* sanlock io errors during lock op, may be transient. */
+
+#endif	/* _LVM_LVMLOCKD_CLIENT_H */
--- a/daemons/lvmlockd/lvmlockd-core.c
+++ b/daemons/lvmlockd/lvmlockd-core.c
--- a/daemons/lvmlockd/lvmlockd-dlm.c
+++ b/daemons/lvmlockd/lvmlockd-dlm.c
@@ -0,0 +1,662 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#define _XOPEN_SOURCE 500  /* pthread */
+#define _ISOC99_SOURCE
+
+#include "tool.h"
+
+#include "daemon-server.h"
+#include "daemon-log.h"
+#include "xlate.h"
+
+#include "lvmlockd-internal.h"
+#include "lvmlockd-client.h"
+
+/*
+ * Using synchronous _wait dlm apis so do not define _REENTRANT and
+ * link with non-threaded version of library, libdlm_lt.
+ */
+#include "libdlm.h"
+
+#include <pthread.h>
+#include <stddef.h>
+#include <poll.h>
+#include <errno.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <byteswap.h>
+#include <syslog.h>
+#include <dirent.h>
+#include <sys/socket.h>
+
+struct lm_dlm {
+	dlm_lshandle_t *dh;
+};
+
+struct rd_dlm {
+	struct dlm_lksb lksb;
+	struct val_blk *vb;
+};
+
+int lm_data_size_dlm(void)
+{
+	return sizeof(struct rd_dlm);
+}
+
+/*
+ * lock_args format
+ *
+ * vg_lock_args format for dlm is
+ * vg_version_string:undefined:cluster_name
+ *
+ * lv_lock_args are not used for dlm
+ *
+ * version_string is MAJOR.MINOR.PATCH
+ * undefined may contain ":"
+ */
+
+#define VG_LOCK_ARGS_MAJOR 1
+#define VG_LOCK_ARGS_MINOR 0
+#define VG_LOCK_ARGS_PATCH 0
+
+static int cluster_name_from_args(char *vg_args, char *clustername)
+{
+	return last_string_from_args(vg_args, clustername);
+}
+
+static int check_args_version(char *vg_args)
+{
+	unsigned int major = 0;
+	int rv;
+
+	rv = version_from_args(vg_args, &major, NULL, NULL);
+	if (rv < 0) {
+		log_error("check_args_version %s error %d", vg_args, rv);
+		return rv;
+	}
+
+	if (major > VG_LOCK_ARGS_MAJOR) {
+		log_error("check_args_version %s major %d %d", vg_args, major, VG_LOCK_ARGS_MAJOR);
+		return -1;
+	}
+
+	return 0;
+}
+
+/* This will be set after dlm_controld is started. */
+#define DLM_CLUSTER_NAME_PATH "/sys/kernel/config/dlm/cluster/cluster_name"
+
+static int read_cluster_name(char *clustername)
+{
+	static const char close_error_msg[] = "read_cluster_name: close_error %d";
+	char *n;
+	int fd;
+	int rv;
+
+	if (daemon_test) {
+		sprintf(clustername, "%s", "test");
+		return 0;
+	}
+
+	fd = open(DLM_CLUSTER_NAME_PATH, O_RDONLY);
+	if (fd < 0) {
+		log_debug("read_cluster_name: open error %d, check dlm_controld", fd);
+		return fd;
+	}
+
+	rv = read(fd, clustername, MAX_ARGS);
+	if (rv < 0) {
+		log_error("read_cluster_name: cluster name read error %d, check dlm_controld", fd);
+		if (close(fd))
+			log_error(close_error_msg, fd);
+		return rv;
+	}
+
+	n = strstr(clustername, "\n");
+	if (n)
+		*n = '\0';
+	if (close(fd))
+		log_error(close_error_msg, fd);
+	return 0;
+}
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	char clustername[MAX_ARGS+1];
+	char lock_args_version[MAX_ARGS+1];
+	int rv;
+
+	memset(clustername, 0, sizeof(clustername));
+	memset(lock_args_version, 0, sizeof(lock_args_version));
+
+	snprintf(lock_args_version, MAX_ARGS, "%u.%u.%u",
+		 VG_LOCK_ARGS_MAJOR, VG_LOCK_ARGS_MINOR, VG_LOCK_ARGS_PATCH);
+
+	rv = read_cluster_name(clustername);
+	if (rv < 0)
+		return -EMANAGER;
+
+	if (strlen(clustername) + strlen(lock_args_version) + 2 > MAX_ARGS) {
+		log_error("init_vg_dlm args too long");
+		return -EARGS;
+	}
+
+	snprintf(vg_args, MAX_ARGS, "%s:%s", lock_args_version, clustername);
+	rv = 0;
+
+	log_debug("init_vg_dlm done %s vg_args %s", ls_name, vg_args);
+	return rv;
+}
+
+int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+	char sys_clustername[MAX_ARGS+1];
+	char arg_clustername[MAX_ARGS+1];
+	struct lm_dlm *lmd;
+	int rv;
+
+	memset(sys_clustername, 0, sizeof(sys_clustername));
+	memset(arg_clustername, 0, sizeof(arg_clustername));
+
+	rv = read_cluster_name(sys_clustername);
+	if (rv < 0)
+		return -EMANAGER;
+
+	if (!ls->vg_args[0]) {
+		/* global lockspace has no vg args */
+		goto skip_args;
+	}
+
+	rv = check_args_version(ls->vg_args);
+	if (rv < 0)
+		return -EARGS;
+
+	rv = cluster_name_from_args(ls->vg_args, arg_clustername);
+	if (rv < 0) {
+		log_error("prepare_lockspace_dlm %s no cluster name from args %s", ls->name, ls->vg_args);
+		return -EARGS;
+	}
+
+	if (strcmp(sys_clustername, arg_clustername)) {
+		log_error("prepare_lockspace_dlm %s mismatching cluster names sys %s arg %s",
+			  ls->name, sys_clustername, arg_clustername);
+		return -EARGS;
+	}
+
+ skip_args:
+	lmd = malloc(sizeof(struct lm_dlm));
+	if (!lmd)
+		return -ENOMEM;
+
+	ls->lm_data = lmd;
+	return 0;
+}
+
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+
+	if (daemon_test)
+		return 0;
+
+	if (adopt)
+		lmd->dh = dlm_open_lockspace(ls->name);
+	else
+		lmd->dh = dlm_new_lockspace(ls->name, 0600, DLM_LSFL_NEWEXCL);
+
+	if (!lmd->dh) {
+		log_error("add_lockspace_dlm %s adopt %d error", ls->name, adopt);
+		free(lmd);
+		ls->lm_data = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	int rv;
+
+	if (daemon_test)
+		goto out;
+
+	/*
+	 * If free_vg is set, it means we are doing vgremove, and we may want
+	 * to tell any other nodes to leave the lockspace.  This is not really
+	 * necessary since there should be no harm in having an unused
+	 * lockspace sitting around.  A new "notification lock" would need to
+	 * be added with a callback to signal this. 
+	 */
+
+	rv = dlm_release_lockspace(ls->name, lmd->dh, 1);
+	if (rv < 0) {
+		log_error("rem_lockspace_dlm error %d", rv);
+		return rv;
+	}
+ out:
+	free(lmd);
+	ls->lm_data = NULL;
+
+	if (!strcmp(ls->name, gl_lsname_dlm))
+		gl_running_dlm = 0;
+
+	return 0;
+}
+
+static int lm_add_resource_dlm(struct lockspace *ls, struct resource *r, int with_lock_nl)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	uint32_t flags = 0;
+	char *buf;
+	int rv;
+
+	if (r->type == LD_RT_GL || r->type == LD_RT_VG) {
+		buf = malloc(sizeof(struct val_blk) + DLM_LVB_LEN);
+		if (!buf)
+			return -ENOMEM;
+		memset(buf, 0, sizeof(struct val_blk) + DLM_LVB_LEN);
+
+		rdd->vb = (struct val_blk *)buf;
+		rdd->lksb.sb_lvbptr = buf + sizeof(struct val_blk);
+
+		flags |= LKF_VALBLK;
+	}
+
+	if (!with_lock_nl)
+		goto out;
+
+	/* because this is a new NL lock request */
+	flags |= LKF_EXPEDITE;
+
+	if (daemon_test)
+		goto out;
+
+	rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, &rdd->lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv < 0) {
+		log_error("S %s R %s add_resource_dlm lock error %d", ls->name, r->name, rv);
+		return rv;
+	}
+ out:
+	return 0;
+}
+
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	int rv = 0;
+
+	if (daemon_test)
+		goto out;
+
+	lksb = &rdd->lksb;
+
+	if (!lksb->sb_lkid)
+		goto out;
+
+	rv = dlm_ls_unlock_wait(lmd->dh, lksb->sb_lkid, 0, lksb);
+	if (rv < 0) {
+		log_error("S %s R %s rem_resource_dlm unlock error %d", ls->name, r->name, rv);
+	}
+ out:
+	if (rdd->vb)
+		free(rdd->vb);
+
+	memset(rdd, 0, sizeof(struct rd_dlm));
+	r->lm_init = 0;
+	return rv;
+}
+
+static int to_dlm_mode(int ld_mode)
+{
+	switch (ld_mode) {
+	case LD_LK_EX:
+		return LKM_EXMODE;
+	case LD_LK_SH:
+		return LKM_PRMODE;
+	};
+	return -1;
+}
+
+static int lm_adopt_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+			uint32_t *r_version)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	uint32_t flags = 0;
+	int mode;
+	int rv;
+
+	*r_version = 0;
+
+	if (!r->lm_init) {
+		rv = lm_add_resource_dlm(ls, r, 0);
+		if (rv < 0)
+			return rv;
+		r->lm_init = 1;
+	}
+
+	lksb = &rdd->lksb;
+
+	flags |= LKF_PERSISTENT;
+	flags |= LKF_ORPHAN;
+
+	if (rdd->vb)
+		flags |= LKF_VALBLK;
+
+	mode = to_dlm_mode(ld_mode);
+	if (mode < 0) {
+		log_error("adopt_dlm invalid mode %d", ld_mode);
+		rv = -EINVAL;
+		goto fail;
+	}
+
+	log_debug("S %s R %s adopt_dlm", ls->name, r->name);
+
+	if (daemon_test)
+		return 0;
+
+	/*
+	 * dlm returns 0 for success, -EAGAIN if an orphan is
+	 * found with another mode, and -ENOENT if no orphan.
+	 *
+	 * cast/bast/param are (void *)1 because the kernel
+	 * returns errors if some are null.
+	 */
+
+	rv = dlm_ls_lockx(lmd->dh, mode, lksb, flags,
+			  r->name, strlen(r->name), 0,
+			  (void *)1, (void *)1, (void *)1,
+			  NULL, NULL);
+
+	if (rv == -EAGAIN) {
+		log_debug("S %s R %s adopt_dlm adopt mode %d try other mode",
+			  ls->name, r->name, ld_mode);
+		rv = -EUCLEAN;
+		goto fail;
+	}
+	if (rv < 0) {
+		log_debug("S %s R %s adopt_dlm mode %d flags %x error %d errno %d",
+			  ls->name, r->name, mode, flags, rv, errno);
+		goto fail;
+	}
+
+	/*
+	 * FIXME: For GL/VG locks we probably want to read the lvb,
+	 * especially if adopting an ex lock, because when we
+	 * release this adopted ex lock we may want to write new
+	 * lvb values based on the current lvb values (at lease
+	 * in the GL case where we increment the current values.)
+	 *
+	 * It should be possible to read the lvb by requesting
+	 * this lock in the same mode it's already in.
+	 */
+
+	return rv;
+
+ fail:
+	lm_rem_resource_dlm(ls, r);
+	return rv;
+}
+
+/*
+ * Use PERSISTENT so that if lvmlockd exits while holding locks,
+ * the locks will remain orphaned in the dlm, still protecting what
+ * they were acquired to protect.
+ */
+
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb;
+	struct val_blk vb;
+	uint32_t flags = 0;
+	uint16_t vb_version;
+	int mode;
+	int rv;
+
+	if (adopt) {
+		/* When adopting, we don't follow the normal method
+		   of acquiring a NL lock then converting it to the
+		   desired mode. */
+		return lm_adopt_dlm(ls, r, ld_mode, r_version);
+	}
+
+	if (!r->lm_init) {
+		rv = lm_add_resource_dlm(ls, r, 1);
+		if (rv < 0)
+			return rv;
+		r->lm_init = 1;
+	}
+
+	lksb = &rdd->lksb;
+
+	flags |= LKF_CONVERT;
+	flags |= LKF_NOQUEUE;
+	flags |= LKF_PERSISTENT;
+
+	if (rdd->vb)
+		flags |= LKF_VALBLK;
+
+	mode = to_dlm_mode(ld_mode);
+	if (mode < 0) {
+		log_error("lock_dlm invalid mode %d", ld_mode);
+		return -EINVAL;
+	}
+
+	log_debug("S %s R %s lock_dlm", ls->name, r->name);
+
+	if (daemon_test) {
+		*r_version = 0;
+		return 0;
+	}
+
+	rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv == -EAGAIN) {
+		log_error("S %s R %s lock_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+		return -EAGAIN;
+	}
+	if (rv < 0) {
+		log_error("S %s R %s lock_dlm error %d", ls->name, r->name, rv);
+		return rv;
+	}
+
+	if (rdd->vb) {
+		if (lksb->sb_flags & DLM_SBF_VALNOTVALID) {
+			log_debug("S %s R %s lock_dlm VALNOTVALID", ls->name, r->name);
+			memset(rdd->vb, 0, sizeof(struct val_blk));
+			*r_version = 0;
+			goto out;
+		}
+
+		memcpy(&vb, lksb->sb_lvbptr, sizeof(struct val_blk));
+		vb_version = le16_to_cpu(vb.version);
+
+		if (vb_version && ((vb_version & 0xFF00) > (VAL_BLK_VERSION & 0xFF00))) {
+			log_error("S %s R %s lock_dlm ignore vb_version %x",
+				  ls->name, r->name, vb_version);
+			*r_version = 0;
+			free(rdd->vb);
+			rdd->vb = NULL;
+			lksb->sb_lvbptr = NULL;
+			goto out;
+		}
+
+		*r_version = le32_to_cpu(vb.r_version);
+		memcpy(rdd->vb, &vb, sizeof(vb)); /* rdd->vb saved as le */
+
+		log_debug("S %s R %s lock_dlm get r_version %u",
+			  ls->name, r->name, *r_version);
+	}
+out:
+	return 0;
+}
+
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb = &rdd->lksb;
+	uint32_t mode;
+	uint32_t flags = 0;
+	int rv;
+
+	log_debug("S %s R %s convert_dlm", ls->name, r->name);
+
+	flags |= LKF_CONVERT;
+	flags |= LKF_NOQUEUE;
+	flags |= LKF_PERSISTENT;
+
+	if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+		if (!rdd->vb->version) {
+			/* first time vb has been written */
+			rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+		}
+		rdd->vb->r_version = cpu_to_le32(r_version);
+		memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+		log_debug("S %s R %s convert_dlm set r_version %u",
+			  ls->name, r->name, r_version);
+
+		flags |= LKF_VALBLK;
+	}
+
+	mode = to_dlm_mode(ld_mode);
+
+	if (daemon_test)
+		return 0;
+
+	rv = dlm_ls_lock_wait(lmd->dh, mode, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv == -EAGAIN) {
+		/* FIXME: When does this happen?  Should something different be done? */
+		log_error("S %s R %s convert_dlm mode %d rv EAGAIN", ls->name, r->name, mode);
+		return -EAGAIN;
+	}
+	if (rv < 0) {
+		log_error("S %s R %s convert_dlm error %d", ls->name, r->name, rv);
+	}
+	return rv;
+}
+
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmuf_flags)
+{
+	struct lm_dlm *lmd = (struct lm_dlm *)ls->lm_data;
+	struct rd_dlm *rdd = (struct rd_dlm *)r->lm_data;
+	struct dlm_lksb *lksb = &rdd->lksb;
+	uint32_t flags = 0;
+	int rv;
+
+	log_debug("S %s R %s unlock_dlm r_version %u flags %x",
+		  ls->name, r->name, r_version, lmuf_flags);
+
+	/*
+	 * Do not set PERSISTENT, because we don't need an orphan
+	 * NL lock to protect anything.
+	 */
+
+	flags |= LKF_CONVERT;
+
+	if (rdd->vb && r_version && (r->mode == LD_LK_EX)) {
+		if (!rdd->vb->version) {
+			/* first time vb has been written */
+			rdd->vb->version = cpu_to_le16(VAL_BLK_VERSION);
+		}
+		if (r_version)
+			rdd->vb->r_version = cpu_to_le32(r_version);
+		memcpy(lksb->sb_lvbptr, rdd->vb, sizeof(struct val_blk));
+
+		log_debug("S %s R %s unlock_dlm set r_version %u",
+			  ls->name, r->name, r_version);
+
+		flags |= LKF_VALBLK;
+	}
+
+	if (daemon_test)
+		return 0;
+
+	rv = dlm_ls_lock_wait(lmd->dh, LKM_NLMODE, lksb, flags,
+			      r->name, strlen(r->name),
+			      0, NULL, NULL, NULL);
+	if (rv < 0) {
+		log_error("S %s R %s unlock_dlm error %d", ls->name, r->name, rv);
+	}
+
+	return rv;
+}
+
+/*
+ * This list could be read from dlm_controld via libdlmcontrol,
+ * but it's simpler to get it from sysfs.
+ */
+
+#define DLM_LOCKSPACES_PATH "/sys/kernel/config/dlm/cluster/spaces"
+
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+	static const char closedir_err_msg[] = "lm_get_lockspace_dlm: closedir failed";
+	struct lockspace *ls;
+	struct dirent *de;
+	DIR *ls_dir;
+
+	if (!(ls_dir = opendir(DLM_LOCKSPACES_PATH)))
+		return -ECONNREFUSED;
+
+	while ((de = readdir(ls_dir))) {
+		if (de->d_name[0] == '.')
+			continue;
+
+		if (strncmp(de->d_name, LVM_LS_PREFIX, strlen(LVM_LS_PREFIX)))
+			continue;
+
+		if (!(ls = alloc_lockspace())) {
+			if (closedir(ls_dir))
+				log_error(closedir_err_msg);
+			return -ENOMEM;
+		}
+
+		ls->lm_type = LD_LM_DLM;
+		strncpy(ls->name, de->d_name, MAX_NAME);
+		strncpy(ls->vg_name, ls->name + strlen(LVM_LS_PREFIX), MAX_NAME);
+		list_add_tail(&ls->list, ls_rejoin);
+	}
+
+	if (closedir(ls_dir))
+		log_error(closedir_err_msg);
+	return 0;
+}
+
+int lm_is_running_dlm(void)
+{
+	char sys_clustername[MAX_ARGS+1];
+	int rv;
+
+	memset(sys_clustername, 0, sizeof(sys_clustername));
+
+	rv = read_cluster_name(sys_clustername);
+	if (rv < 0)
+		return 0;
+	return 1;
+}
--- a/daemons/lvmlockd/lvmlockd-internal.h
+++ b/daemons/lvmlockd/lvmlockd-internal.h
@@ -0,0 +1,577 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ */
+
+#ifndef _LVM_LVMLOCKD_INTERNAL_H
+#define _LVM_LVMLOCKD_INTERNAL_H
+
+#define MAX_NAME 64
+#define MAX_ARGS 64
+
+#define R_NAME_GL_DISABLED "_GLLK_disabled"
+#define R_NAME_GL          "GLLK"
+#define R_NAME_VG          "VGLK"
+#define S_NAME_GL_DLM      "lvm_global"
+#define LVM_LS_PREFIX      "lvm_"           /* ls name is prefix + vg_name */
+/* global lockspace name for sanlock is a vg name */
+
+/* lock manager types */
+enum {
+	LD_LM_NONE = 0,
+	LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */
+	LD_LM_DLM = 2,
+	LD_LM_SANLOCK = 3,
+};
+
+/* operation types */
+enum {
+	LD_OP_HELLO = 1,
+	LD_OP_QUIT,
+	LD_OP_INIT,
+	LD_OP_FREE,
+	LD_OP_START,
+	LD_OP_STOP,
+	LD_OP_LOCK,
+	LD_OP_UPDATE,
+	LD_OP_CLOSE,
+	LD_OP_ENABLE,
+	LD_OP_DISABLE,
+	LD_OP_START_WAIT,
+	LD_OP_STOP_ALL,
+	LD_OP_DUMP_INFO,
+	LD_OP_DUMP_LOG,
+	LD_OP_RENAME_BEFORE,
+	LD_OP_RENAME_FINAL,
+	LD_OP_RUNNING_LM,
+	LD_OP_FIND_FREE_LOCK,
+	LD_OP_FORGET_VG_NAME,
+	LD_OP_KILL_VG,
+	LD_OP_DROP_VG,
+};
+
+/* resource types */
+enum {
+	LD_RT_GL = 1,
+	LD_RT_VG,
+	LD_RT_LV,
+};
+
+/* lock modes, more restrictive must be larger value */
+enum {
+	LD_LK_IV = -1,
+	LD_LK_UN = 0,
+	LD_LK_NL = 1,
+	LD_LK_SH = 2,
+	LD_LK_EX = 3,
+};
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+struct client {
+	struct list_head list;
+	pthread_mutex_t mutex;
+	int pid;
+	int fd;
+	int pi;
+	uint32_t id;
+	unsigned int recv : 1;
+	unsigned int dead : 1;
+	unsigned int poll_ignore : 1;
+	char name[MAX_NAME+1];
+};
+
+#define LD_AF_PERSISTENT           0x00000001
+#define LD_AF_UNUSED               0x00000002 /* use me */
+#define LD_AF_UNLOCK_CANCEL        0x00000004
+#define LD_AF_NEXT_VERSION         0x00000008
+#define LD_AF_WAIT                 0x00000010
+#define LD_AF_FORCE                0x00000020
+#define LD_AF_EX_DISABLE           0x00000040
+#define LD_AF_ENABLE               0x00000080
+#define LD_AF_DISABLE              0x00000100
+#define LD_AF_SEARCH_LS            0x00000200
+#define LD_AF_WAIT_STARTING        0x00001000
+#define LD_AF_DUP_GL_LS            0x00002000
+#define LD_AF_INACTIVE_LS          0x00004000
+#define LD_AF_ADD_LS_ERROR         0x00008000
+#define LD_AF_ADOPT                0x00010000
+#define LD_AF_WARN_GL_REMOVED	   0x00020000
+
+/*
+ * Number of times to repeat a lock request after
+ * a lock conflict (-EAGAIN) if unspecified in the
+ * request.
+ */
+#define DEFAULT_MAX_RETRIES 4
+
+struct action {
+	struct list_head list;
+	uint32_t client_id;
+	uint32_t flags;			/* LD_AF_ */
+	uint32_t version;
+	uint64_t host_id;
+	int8_t op;			/* operation type LD_OP_ */
+	int8_t rt;			/* resource type LD_RT_ */
+	int8_t mode;			/* lock mode LD_LK_ */
+	int8_t lm_type;			/* lock manager: LM_DLM, LM_SANLOCK */
+	int retries;
+	int max_retries;
+	int result;
+	int lm_rv;			/* return value from lm_ function */
+	char vg_uuid[64];
+	char vg_name[MAX_NAME+1];
+	char lv_name[MAX_NAME+1];
+	char lv_uuid[MAX_NAME+1];
+	char vg_args[MAX_ARGS+1];
+	char lv_args[MAX_ARGS+1];
+	char vg_sysid[MAX_NAME+1];
+};
+
+struct resource {
+	struct list_head list;		/* lockspace.resources */
+	char name[MAX_NAME+1];		/* vg name or lv name */
+	int8_t type;			/* resource type LD_RT_ */
+	int8_t mode;
+	unsigned int sh_count;		/* number of sh locks on locks list */
+	uint32_t version;
+	unsigned int lm_init : 1;	/* lm_data is initialized */
+	unsigned int adopt : 1;		/* temp flag in remove_inactive_lvs */
+	unsigned int version_zero_valid : 1;
+	struct list_head locks;
+	struct list_head actions;
+	struct val_blk *vb;
+	char lv_args[MAX_ARGS+1];
+	char lm_data[0];		/* lock manager specific data */
+};
+
+#define LD_LF_PERSISTENT 0x00000001
+
+struct lock {
+	struct list_head list;		/* resource.locks */
+	int8_t mode;			/* lock mode LD_LK_ */
+	uint32_t version;
+	uint32_t flags;			/* LD_LF_ */
+	uint32_t client_id; /* may be 0 for persistent or internal locks */
+};
+
+struct lockspace {
+	struct list_head list;		/* lockspaces */
+	char name[MAX_NAME+1];
+	char vg_name[MAX_NAME+1];
+	char vg_uuid[64];
+	char vg_args[MAX_ARGS+1];	/* lock manager specific args */
+	char vg_sysid[MAX_NAME+1];
+	int8_t lm_type;			/* lock manager: LM_DLM, LM_SANLOCK */
+	void *lm_data;
+	uint64_t host_id;
+	uint64_t free_lock_offset;	/* start search for free lock here */
+
+	uint32_t start_client_id;	/* client_id that started the lockspace */
+	pthread_t thread;		/* makes synchronous lock requests */
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	unsigned int create_fail : 1;
+	unsigned int create_done : 1;
+	unsigned int thread_work : 1;
+	unsigned int thread_stop : 1;
+	unsigned int thread_done : 1;
+	unsigned int sanlock_gl_enabled: 1;
+	unsigned int sanlock_gl_dup: 1;
+	unsigned int free_vg: 1;
+	unsigned int kill_vg: 1;
+	unsigned int drop_vg: 1;
+
+	struct list_head actions;	/* new client actions */
+	struct list_head resources;	/* resource/lock state for gl/vg/lv */
+};
+
+#define VAL_BLK_VERSION 0x0101
+
+struct val_blk {
+	uint16_t version;
+	uint16_t flags;
+	uint32_t r_version;
+};
+
+/* lm_unlock flags */
+#define LMUF_FREE_VG 0x00000001
+
+#define container_of(ptr, type, member) ({                      \
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+                              struct list_head *prev,
+                              struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+#define list_for_each_entry(pos, head, member)                          \
+	for (pos = list_entry((head)->next, typeof(*pos), member);      \
+	     &pos->member != (head);    \
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member)                  \
+	for (pos = list_entry((head)->next, typeof(*pos), member),      \
+	     n = list_entry(pos->member.next, typeof(*pos), member); \
+	     &pos->member != (head);                                    \
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+
+/* to improve readability */
+#define WAIT     1
+#define NO_WAIT  0
+#define FORCE    1
+#define NO_FORCE 0
+
+/*
+ * global variables
+ */
+
+#ifndef EXTERN
+#define EXTERN extern
+#define INIT(X)
+#else
+#undef EXTERN
+#define EXTERN
+#define INIT(X) =X
+#endif
+
+/*
+ * gl_type_static and gl_use_ are set by command line or config file
+ * to specify whether the global lock comes from dlm or sanlock.
+ * Without a static setting, lvmlockd will figure out where the
+ * global lock should be (but it could get mixed up in cases where
+ * both sanlock and dlm vgs exist.)
+ *
+ * gl_use_dlm means that the gl should come from lockspace gl_lsname_dlm
+ * gl_use_sanlock means that the gl should come from lockspace gl_lsname_sanlock
+ *
+ * gl_use_dlm has precedence over gl_use_sanlock, so if a node sees both
+ * dlm and sanlock vgs, it will use the dlm gl.
+ *
+ * gl_use_ is set when the first evidence of that lm_type is seen
+ * in any command.
+ *
+ * gl_lsname_sanlock is set when the first vg is seen in which an
+ * enabled gl is exists, or when init_vg creates a vg with gl enabled,
+ * or when enable_gl is used.
+ *
+ * gl_lsname_sanlock is cleared when free_vg deletes a vg with gl enabled
+ * or when disable_gl matches.
+ */
+
+EXTERN int gl_running_dlm;
+EXTERN int gl_type_static;
+EXTERN int gl_use_dlm;
+EXTERN int gl_use_sanlock;
+EXTERN pthread_mutex_t gl_type_mutex;
+
+EXTERN char gl_lsname_dlm[MAX_NAME+1];
+EXTERN char gl_lsname_sanlock[MAX_NAME+1];
+
+EXTERN int daemon_test; /* run as much as possible without a live lock manager */
+EXTERN int daemon_debug;
+EXTERN int daemon_host_id;
+EXTERN const char *daemon_host_id_file;
+EXTERN int sanlock_io_timeout;
+
+/*
+ * This flag is set to 1 if we see multiple vgs with the global
+ * lock enabled.  While this is set, we return a special flag
+ * with the vg lock result indicating to the lvm command that
+ * there is a duplicate gl in the vg which should be resolved.
+ * While this is set, find_lockspace_name has the side job of
+ * counting the number of lockspaces with enabled gl's so that
+ * this can be set back to zero when the duplicates are disabled.
+ */
+EXTERN int sanlock_gl_dup;
+
+void log_level(int level, const char *fmt, ...)  __attribute__((format(printf, 2, 3)));
+#define log_debug(fmt, args...) log_level(LOG_DEBUG, fmt, ##args)
+#define log_error(fmt, args...) log_level(LOG_ERR, fmt, ##args)
+#define log_warn(fmt, args...) log_level(LOG_WARNING, fmt, ##args)
+
+struct lockspace *alloc_lockspace(void);
+int lockspaces_empty(void);
+int last_string_from_args(char *args_in, char *last);
+int version_from_args(char *args, unsigned int *major, unsigned int *minor, unsigned int *patch);
+
+
+#ifdef LOCKDDLM_SUPPORT
+
+int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_dlm(struct lockspace *ls);
+int lm_add_lockspace_dlm(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg);
+int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt);
+int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version);
+int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmu_flags);
+int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r);
+int lm_get_lockspaces_dlm(struct list_head *ls_rejoin);
+int lm_data_size_dlm(void);
+int lm_is_running_dlm(void);
+
+static inline int lm_support_dlm(void)
+{
+	return 1;
+}
+
+#else
+
+static inline int lm_init_vg_dlm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_prepare_lockspace_dlm(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_add_lockspace_dlm(struct lockspace *ls, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_rem_lockspace_dlm(struct lockspace *ls, int free_vg)
+{
+	return -1;
+}
+
+static inline int lm_lock_dlm(struct lockspace *ls, struct resource *r, int ld_mode,
+		uint32_t *r_version, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_convert_dlm(struct lockspace *ls, struct resource *r,
+		   int ld_mode, uint32_t r_version)
+{
+	return -1;
+}
+
+static inline int lm_unlock_dlm(struct lockspace *ls, struct resource *r,
+		  uint32_t r_version, uint32_t lmu_flags)
+{
+	return -1;
+}
+
+static inline int lm_rem_resource_dlm(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_get_lockspaces_dlm(struct list_head *ls_rejoin)
+{
+	return -1;
+}
+
+static inline int lm_data_size_dlm(void)
+{
+	return -1;
+}
+
+static inline int lm_is_running_dlm(void)
+{
+	return 0;
+}
+
+static inline int lm_support_dlm(void)
+{
+	return 0;
+}
+
+#endif /* dlm support */
+
+#ifdef LOCKDSANLOCK_SUPPORT
+
+int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset);
+int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r);
+int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args);
+int lm_prepare_lockspace_sanlock(struct lockspace *ls);
+int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt);
+int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg);
+int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+		    uint32_t *r_version, int *retry, int adopt);
+int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+		       int ld_mode, uint32_t r_version);
+int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+		      uint32_t r_version, uint32_t lmu_flags);
+int lm_able_gl_sanlock(struct lockspace *ls, int enable);
+int lm_ex_disable_gl_sanlock(struct lockspace *ls);
+int lm_hosts_sanlock(struct lockspace *ls, int notify);
+int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r);
+int lm_gl_is_enabled(struct lockspace *ls);
+int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin);
+int lm_data_size_sanlock(void);
+int lm_is_running_sanlock(void);
+int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset);
+
+static inline int lm_support_sanlock(void)
+{
+	return 1;
+}
+
+#else
+
+static inline int lm_init_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_init_lv_sanlock(char *ls_name, char *vg_name, char *lv_name, char *vg_args, char *lv_args, uint64_t free_offset)
+{
+	return -1;
+}
+
+static inline int lm_free_lv_sanlock(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_rename_vg_sanlock(char *ls_name, char *vg_name, uint32_t flags, char *vg_args)
+{
+	return -1;
+}
+
+static inline int lm_prepare_lockspace_sanlock(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_add_lockspace_sanlock(struct lockspace *ls, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_rem_lockspace_sanlock(struct lockspace *ls, int free_vg)
+{
+	return -1;
+}
+
+static inline int lm_lock_sanlock(struct lockspace *ls, struct resource *r, int ld_mode,
+		    uint32_t *r_version, int *retry, int adopt)
+{
+	return -1;
+}
+
+static inline int lm_convert_sanlock(struct lockspace *ls, struct resource *r,
+		       int ld_mode, uint32_t r_version)
+{
+	return -1;
+}
+
+static inline int lm_unlock_sanlock(struct lockspace *ls, struct resource *r,
+		      uint32_t r_version, uint32_t lmu_flags)
+{
+	return -1;
+}
+
+static inline int lm_able_gl_sanlock(struct lockspace *ls, int enable)
+{
+	return -1;
+}
+
+static inline int lm_ex_disable_gl_sanlock(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_hosts_sanlock(struct lockspace *ls, int notify)
+{
+	return -1;
+}
+
+static inline int lm_rem_resource_sanlock(struct lockspace *ls, struct resource *r)
+{
+	return -1;
+}
+
+static inline int lm_gl_is_enabled(struct lockspace *ls)
+{
+	return -1;
+}
+
+static inline int lm_get_lockspaces_sanlock(struct list_head *ls_rejoin)
+{
+	return -1;
+}
+
+static inline int lm_data_size_sanlock(void)
+{
+	return -1;
+}
+
+static inline int lm_is_running_sanlock(void)
+{
+	return 0;
+}
+
+static inline int lm_find_free_lock_sanlock(struct lockspace *ls, uint64_t *free_offset)
+{
+	return -1;
+}
+
+static inline int lm_support_sanlock(void)
+{
+	return 0;
+}
+
+#endif /* sanlock support */
+
+#endif	/* _LVM_LVMLOCKD_INTERNAL_H */
--- a/daemons/lvmlockd/lvmlockd-sanlock.c
+++ b/daemons/lvmlockd/lvmlockd-sanlock.c
--- a/daemons/lvmpolld/.gitignore
+++ b/daemons/lvmpolld/.gitignore
@@ -0,0 +1 @@
+lvmpolld
--- a/daemons/lvmpolld/Makefile.in
+++ b/daemons/lvmpolld/Makefile.in
@@ -0,0 +1,48 @@
+#
+# Copyright (C) 2014-2015 Red Hat, Inc.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+top_builddir = @top_builddir@
+
+SOURCES = lvmpolld-core.c lvmpolld-data-utils.c lvmpolld-cmd-utils.c
+
+TARGETS = lvmpolld
+
+.PHONY: install_lvmpolld
+
+CFLOW_LIST = $(SOURCES)
+CFLOW_LIST_TARGET = $(LIB_NAME).cflow
+CFLOW_TARGET = lvmpolld
+
+include $(top_builddir)/make.tmpl
+
+INCLUDES += -I$(top_srcdir)/libdaemon/server
+LVMLIBS = -ldaemonserver $(LVMINTERNAL_LIBS) -ldevmapper
+
+LIBS += $(PTHREAD_LIBS)
+
+LDFLAGS += -L$(top_builddir)/libdaemon/server $(DAEMON_LDFLAGS)
+CLDFLAGS += -L$(top_builddir)/libdaemon/server
+CFLAGS += $(DAEMON_CFLAGS)
+
+lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
+		    $(top_builddir)/libdaemon/server/libdaemonserver.a
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(LVMLIBS) $(LIBS)
+
+install_lvmpolld: lvmpolld
+	$(INSTALL_PROGRAM) -D $< $(sbindir)/$(<F)
+
+install_lvm2: install_lvmpolld
+
+install: install_lvm2
--- a/daemons/lvmpolld/lvmpolld-cmd-utils.c
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+/* extract this info from autoconf/automake files */
+#define LVPOLL_CMD "lvpoll"
+
+#define MIN_ARGV_SIZE  8
+
+static const char *const const polling_ops[] = { [PVMOVE] = LVMPD_REQ_PVMOVE,
+						 [CONVERT] = LVMPD_REQ_CONVERT,
+						 [MERGE] = LVMPD_REQ_MERGE,
+						 [MERGE_THIN] = LVMPD_REQ_MERGE_THIN };
+
+const char *polling_op(enum poll_type type)
+{
+	return type < POLL_TYPE_MAX ? polling_ops[type] : "<undefined>";
+}
+
+static int add_to_cmd_arr(const char ***cmdargv, const char *str, unsigned *ind)
+{
+	const char **newargv = *cmdargv;
+
+	if (*ind && !(*ind % MIN_ARGV_SIZE)) {
+		newargv = dm_realloc(*cmdargv, (*ind / MIN_ARGV_SIZE + 1) * MIN_ARGV_SIZE * sizeof(char *));
+		if (!newargv)
+			return 0;
+		*cmdargv = newargv;
+	}
+
+	*(*cmdargv + (*ind)++) = str;
+
+	return 1;
+}
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort_polling, unsigned handle_missing_pvs)
+{
+	unsigned i = 0;
+	const char **cmd_argv = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+	if (!cmd_argv)
+		return NULL;
+
+	/* path to lvm2 binary */
+	if (!add_to_cmd_arr(&cmd_argv, lvm_binary, &i))
+		goto err;
+
+	/* cmd to execute */
+	if (!add_to_cmd_arr(&cmd_argv, LVPOLL_CMD, &i))
+		goto err;
+
+	/* transfer internal polling interval */
+	if (pdlv->sinterval &&
+	    (!add_to_cmd_arr(&cmd_argv, "--interval", &i) ||
+	     !add_to_cmd_arr(&cmd_argv, pdlv->sinterval, &i)))
+		goto err;
+
+	/* pass abort param */
+	if (abort_polling &&
+	    !add_to_cmd_arr(&cmd_argv, "--abort", &i))
+		goto err;
+
+	/* pass handle-missing-pvs. used by mirror polling operation */
+	if (handle_missing_pvs &&
+	    !add_to_cmd_arr(&cmd_argv, "--handlemissingpvs", &i))
+		goto err;
+
+	/* one of: "convert", "pvmove", "merge", "merge_thin" */
+	if (!add_to_cmd_arr(&cmd_argv, "--polloperation", &i) ||
+	    !add_to_cmd_arr(&cmd_argv, polling_ops[pdlv->type], &i))
+		goto err;
+
+	/* vg/lv name */
+	if (!add_to_cmd_arr(&cmd_argv, pdlv->lvname, &i))
+		goto err;
+
+	/* disable metadata backup */
+	if (!add_to_cmd_arr(&cmd_argv, "-An", &i))
+		goto err;
+
+	/* terminating NULL */
+	if (!add_to_cmd_arr(&cmd_argv, NULL, &i))
+		goto err;
+
+	return cmd_argv;
+err:
+	dm_free(cmd_argv);
+	return NULL;
+}
+
+/* FIXME: in fact exclude should be va list */
+static int copy_env(const char ***cmd_envp, unsigned *i, const char *exclude)
+{
+	const char * const* tmp = (const char * const*) environ;
+
+	if (!tmp)
+		return 0;
+
+	while (*tmp) {
+		if (strncmp(*tmp, exclude, strlen(exclude)) && !add_to_cmd_arr(cmd_envp, *tmp, i))
+			return 0;
+		tmp++;
+	}
+
+	return 1;
+}
+
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv)
+{
+	unsigned i = 0;
+	const char **cmd_envp = dm_malloc(MIN_ARGV_SIZE * sizeof(char *));
+
+	if (!cmd_envp)
+		return NULL;
+
+	/* copy whole environment from lvmpolld, exclude LVM_SYSTEM_DIR if set */
+	if (!copy_env(&cmd_envp, &i, "LVM_SYSTEM_DIR="))
+		goto err;
+
+	/* Add per client LVM_SYSTEM_DIR variable if set */
+	if (*pdlv->lvm_system_dir_env && !add_to_cmd_arr(&cmd_envp, pdlv->lvm_system_dir_env, &i))
+		goto err;
+
+	/* terminating NULL */
+	if (!add_to_cmd_arr(&cmd_envp, NULL, &i))
+		goto err;
+
+	return cmd_envp;
+err:
+	dm_free(cmd_envp);
+	return NULL;
+}
--- a/daemons/lvmpolld/lvmpolld-cmd-utils.h
+++ b/daemons/lvmpolld/lvmpolld-cmd-utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_CMD_UTILS_H
+#define _LVM_LVMPOLLD_CMD_UTILS_H
+
+#include "lvmpolld-data-utils.h"
+
+const char **cmdargv_ctr(const struct lvmpolld_lv *pdlv, const char *lvm_binary, unsigned abort, unsigned handle_missing_pvs);
+const char **cmdenvp_ctr(const struct lvmpolld_lv *pdlv);
+
+const char *polling_op(enum poll_type);
+
+#endif /* _LVM_LVMPOLLD_CMD_UTILS_H */
--- a/daemons/lvmpolld/lvmpolld-common.h
+++ b/daemons/lvmpolld/lvmpolld-common.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2006 Rackable Systems All rights reserved.  
+ * Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved.
 *
 * This file is part of LVM2.
 *
@@ -12,22 +12,20 @@
 * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

-#ifndef _LVM_TIMESTAMP_H
-#define _LVM_TIMESTAMP_H
-
-struct timestamp;
-
-struct timestamp *get_timestamp(void);
-
-/* cmp_timestamp: Compare two timestamps
- * 
- * Return: -1 if t1 is less than t2
- *  	    0 if t1 is equal to t2
- *          1 if t1 is greater than t2
+/*
+ * This file must be included first by every lvmpolld source file.
 */
-int cmp_timestamp(struct timestamp *t1, struct timestamp *t2);
+#ifndef _LVM_LVMPOLLD_COMMON_H
+#define _LVM_LVMPOLLD_COMMON_H

-void destroy_timestamp(struct timestamp *t);
+#define _REENTRANT

-#endif /* _LVM_TIMESTAMP_H */
+#include "tool.h"

+#include "lvmpolld-cmd-utils.h"
+#include "lvmpolld-protocol.h"
+
+#include <assert.h>
+#include <errno.h>
+
+#endif	/* _LVM_LVMPOLLD_COMMON_H */
--- a/daemons/lvmpolld/lvmpolld-core.c
+++ b/daemons/lvmpolld/lvmpolld-core.c
@@ -0,0 +1,985 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "lvm-version.h"
+#include "daemon-server.h"
+#include "daemon-log.h"
+
+#include <getopt.h>
+#include <poll.h>
+#include <wait.h>
+
+#define LVMPOLLD_SOCKET DEFAULT_RUN_DIR "/lvmpolld.socket"
+
+#define PD_LOG_PREFIX "LVMPOLLD"
+#define LVM2_LOG_PREFIX "\tLVPOLL"
+
+/* predefined reason for response = "failed" case */
+#define REASON_REQ_NOT_IMPLEMENTED "request not implemented"
+#define REASON_MISSING_LVID "request requires lvid set"
+#define REASON_MISSING_LVNAME "request requires lvname set"
+#define REASON_MISSING_VGNAME "request requires vgname set"
+#define REASON_POLLING_FAILED "polling of lvm command failed"
+#define REASON_ILLEGAL_ABORT_REQUEST "abort only supported with PVMOVE polling operation"
+#define REASON_DIFFERENT_OPERATION_IN_PROGRESS "Different operation on LV already in progress"
+#define REASON_INVALID_INTERVAL "request requires interval set"
+#define REASON_ENOMEM "not enough memory"
+
+struct lvmpolld_state {
+	daemon_idle *idle;
+	log_state *log;
+	const char *log_config;
+	const char *lvm_binary;
+
+	struct lvmpolld_store *id_to_pdlv_abort;
+	struct lvmpolld_store *id_to_pdlv_poll;
+};
+
+static pthread_key_t key;
+
+static const char *_strerror_r(int errnum, struct lvmpolld_thread_data *data)
+{
+#ifdef _GNU_SOURCE
+	return strerror_r(errnum, data->buf, sizeof(data->buf)); /* never returns NULL */
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600)
+	return strerror_r(errnum, data->buf, sizeof(data->buf)) ? "" : data->buf;
+#else
+#	warning "Can't decide proper strerror_r implementation. lvmpolld will not issue specific system error messages"
+	return "";
+#endif
+}
+
+static void _usage(const char *prog, FILE *file)
+{
+	fprintf(file, "Usage:\n"
+		"%s [-V] [-h] [-f] [-l {all|wire|debug}] [-s path] [-B path] [-p path] [-t secs]\n"
+		"%s --dump [-s path]\n"
+		"   -V|--version     Show version info\n"
+		"   -h|--help        Show this help information\n"
+		"   -f|--foreground  Don't fork, run in the foreground\n"
+		"   --dump           Dump full lvmpolld state\n"
+		"   -l|--log         Logging message level (-l {all|wire|debug})\n"
+		"   -p|--pidfile     Set path to the pidfile\n"
+		"   -s|--socket      Set path to the communication socket\n"
+		"   -B|--binary      Path to lvm2 binary\n"
+		"   -t|--timeout     Time to wait in seconds before shutdown on idle (missing or 0 = inifinite)\n\n", prog, prog);
+}
+
+static int _init(struct daemon_state *s)
+{
+	struct lvmpolld_state *ls = s->private;
+	ls->log = s->log;
+
+	if (!daemon_log_parse(ls->log, DAEMON_LOG_OUTLET_STDERR, ls->log_config, 1))
+		return 0;
+
+	if (pthread_key_create(&key, lvmpolld_thread_data_destroy)) {
+		FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to create pthread key");
+		return 0;
+	}
+
+	ls->id_to_pdlv_poll = pdst_init("polling");
+	ls->id_to_pdlv_abort = pdst_init("abort");
+
+	if (!ls->id_to_pdlv_poll || !ls->id_to_pdlv_abort) {
+		FATAL(ls, "%s: %s", PD_LOG_PREFIX, "Failed to allocate internal data structures");
+		return 0;
+	}
+
+	ls->lvm_binary = ls->lvm_binary ?: LVM_PATH;
+
+	if (access(ls->lvm_binary, X_OK)) {
+		FATAL(ls, "%s: %s %s", PD_LOG_PREFIX, "Execute access rights denied on", ls->lvm_binary);
+		return 0;
+	}
+
+	if (ls->idle)
+		ls->idle->is_idle = 1;
+
+	return 1;
+}
+
+static void _lvmpolld_stores_lock(struct lvmpolld_state *ls)
+{
+	pdst_lock(ls->id_to_pdlv_poll);
+	pdst_lock(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_stores_unlock(struct lvmpolld_state *ls)
+{
+	pdst_unlock(ls->id_to_pdlv_abort);
+	pdst_unlock(ls->id_to_pdlv_poll);
+}
+
+static void _lvmpolld_global_lock(struct lvmpolld_state *ls)
+{
+	_lvmpolld_stores_lock(ls);
+
+	pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_poll);
+	pdst_locked_lock_all_pdlvs(ls->id_to_pdlv_abort);
+}
+
+static void _lvmpolld_global_unlock(struct lvmpolld_state *ls)
+{
+	pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_abort);
+	pdst_locked_unlock_all_pdlvs(ls->id_to_pdlv_poll);
+
+	_lvmpolld_stores_unlock(ls);
+}
+
+static int _fini(struct daemon_state *s)
+{
+	int done;
+	const struct timespec t = { .tv_nsec = 250000000 }; /* .25 sec */
+	struct lvmpolld_state *ls = s->private;
+
+	DEBUGLOG(s, "fini");
+
+	DEBUGLOG(s, "sending cancel requests");
+
+	_lvmpolld_global_lock(ls);
+	pdst_locked_send_cancel(ls->id_to_pdlv_poll);
+	pdst_locked_send_cancel(ls->id_to_pdlv_abort);
+	_lvmpolld_global_unlock(ls);
+
+	DEBUGLOG(s, "waiting for background threads to finish");
+
+	while(1) {
+		_lvmpolld_stores_lock(ls);
+		done = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+		       !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+		_lvmpolld_stores_unlock(ls);
+		if (done)
+			break;
+		nanosleep(&t, NULL);
+	}
+
+	DEBUGLOG(s, "destroying internal data structures");
+
+	_lvmpolld_stores_lock(ls);
+	pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_poll);
+	pdst_locked_destroy_all_pdlvs(ls->id_to_pdlv_abort);
+	_lvmpolld_stores_unlock(ls);
+
+	pdst_destroy(ls->id_to_pdlv_poll);
+	pdst_destroy(ls->id_to_pdlv_abort);
+
+	pthread_key_delete(key);
+
+	return 1;
+}
+
+static response reply(const char *res, const char *reason)
+{
+	return daemon_reply_simple(res, "reason = %s", reason, NULL);
+}
+
+static int read_single_line(struct lvmpolld_thread_data *data, int err)
+{
+	ssize_t r = getline(&data->line, &data->line_size, err ? data->ferr : data->fout);
+
+	if (r > 0 && *(data->line + r - 1) == '\n')
+		*(data->line + r - 1) = '\0';
+
+	return (r > 0);
+}
+
+static void update_idle_state(struct lvmpolld_state *ls)
+{
+	if (!ls->idle)
+		return;
+
+	_lvmpolld_stores_lock(ls);
+
+	ls->idle->is_idle = !pdst_locked_get_active_count(ls->id_to_pdlv_poll) &&
+			    !pdst_locked_get_active_count(ls->id_to_pdlv_abort);
+
+	_lvmpolld_stores_unlock(ls);
+
+	DEBUGLOG(ls, "%s: %s %s%s", PD_LOG_PREFIX, "daemon is", ls->idle->is_idle ? "" : "not ", "idle");
+}
+
+/* make this configurable */
+#define MAX_TIMEOUT 2
+
+static int poll_for_output(struct lvmpolld_lv *pdlv, struct lvmpolld_thread_data *data)
+{
+	int ch_stat, r, err = 1, fds_count = 2, timeout = 0;
+	pid_t pid;
+	struct lvmpolld_cmd_stat cmd_state = { .retcode = -1, .signal = 0 };
+	struct pollfd fds[] = { { .fd = data->outpipe[0], .events = POLLIN },
+				{ .fd = data->errpipe[0], .events = POLLIN } };
+
+	if (!(data->fout = fdopen(data->outpipe[0], "r")) || !(data->ferr = fdopen(data->errpipe[0], "r"))) {
+		ERROR(pdlv->ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to open file stream",
+		      errno, _strerror_r(errno, data));
+		goto out;
+	}
+
+	while (1) {
+		do {
+			r = poll(fds, 2, pdlv_get_timeout(pdlv) * 1000);
+		} while (r < 0 && errno == EINTR);
+
+		DEBUGLOG(pdlv->ls, "%s: %s %d", PD_LOG_PREFIX, "poll() returned", r);
+		if (r < 0) {
+			ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+			      PD_LOG_PREFIX, "poll() for LVM2 cmd", pdlv->cmd_pid,
+			      errno, _strerror_r(errno, data));
+			goto out;
+		} else if (!r) {
+			timeout++;
+
+			WARN(pdlv->ls, "%s: %s (PID %d) %s", PD_LOG_PREFIX,
+			     "polling for output of the lvm cmd", pdlv->cmd_pid,
+			     "has timed out");
+
+			if (timeout > MAX_TIMEOUT) {
+				ERROR(pdlv->ls, "%s: %s (PID %d) (no output for %d seconds)",
+				      PD_LOG_PREFIX,
+				      "LVM2 cmd is unresponsive too long",
+				      pdlv->cmd_pid,
+				      timeout * pdlv_get_timeout(pdlv));
+				goto out;
+			}
+
+			continue; /* while(1) */
+		}
+
+		timeout = 0;
+
+		/* handle the command's STDOUT */
+		if (fds[0].revents & POLLIN) {
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught input data in STDOUT");
+
+			assert(read_single_line(data, 0)); /* may block indef. anyway */
+			INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+			     pdlv->cmd_pid, "STDOUT", data->line);
+		} else if (fds[0].revents) {
+			if (fds[0].revents & POLLHUP)
+				DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught POLLHUP");
+			else
+				WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+			fds[0].fd = -1;
+			fds_count--;
+		}
+
+		/* handle the command's STDERR */
+		if (fds[1].revents & POLLIN) {
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX,
+				 "caught input data in STDERR");
+
+			assert(read_single_line(data, 1)); /* may block indef. anyway */
+			INFO(pdlv->ls, "%s: PID %d: %s: '%s'", LVM2_LOG_PREFIX,
+			     pdlv->cmd_pid, "STDERR", data->line);
+		} else if (fds[1].revents) {
+			if (fds[1].revents & POLLHUP)
+				DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "caught err POLLHUP");
+			else
+				WARN(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "poll for command's STDOUT failed");
+
+			fds[1].fd = -1;
+			fds_count--;
+		}
+
+		do {
+			/*
+			 * fds_count == 0 means polling reached EOF
+			 * or received error on both descriptors.
+			 * In such case, just wait for command to finish
+			 */
+			pid = waitpid(pdlv->cmd_pid, &ch_stat, fds_count ? WNOHANG : 0);
+		} while (pid < 0 && errno == EINTR);
+
+		if (pid) {
+			if (pid < 0) {
+				ERROR(pdlv->ls, "%s: %s (PID %d) failed: (%d) %s",
+				      PD_LOG_PREFIX, "waitpid() for lvm2 cmd",
+				      pdlv->cmd_pid, errno,
+				      _strerror_r(errno, data));
+				goto out;
+			}
+			DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "child exited");
+			break;
+		}
+	} /* while(1) */
+
+	DEBUGLOG(pdlv->ls, "%s: %s", PD_LOG_PREFIX, "about to collect remaining lines");
+	if (fds[0].fd >= 0)
+		while (read_single_line(data, 0)) {
+			assert(r > 0);
+			INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDOUT", data->line);
+		}
+	if (fds[1].fd >= 0)
+		while (read_single_line(data, 1)) {
+			assert(r > 0);
+			INFO(pdlv->ls, "%s: PID %d: %s: %s", LVM2_LOG_PREFIX, pdlv->cmd_pid, "STDERR", data->line);
+		}
+
+	if (WIFEXITED(ch_stat)) {
+		INFO(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+		     "lvm2 cmd", pdlv->cmd_pid, "exited with", WEXITSTATUS(ch_stat));
+		cmd_state.retcode = WEXITSTATUS(ch_stat);
+	} else if (WIFSIGNALED(ch_stat)) {
+		WARN(pdlv->ls, "%s: %s (PID %d) %s (%d)", PD_LOG_PREFIX,
+		     "lvm2 cmd", pdlv->cmd_pid, "got terminated by signal",
+		     WTERMSIG(ch_stat));
+		cmd_state.signal = WTERMSIG(ch_stat);
+	}
+
+	err = 0;
+out:
+	if (!err)
+		pdlv_set_cmd_state(pdlv, &cmd_state);
+
+	return err;
+}
+
+static void debug_print(struct lvmpolld_state *ls, const char * const* ptr)
+{
+	const char * const* tmp = ptr;
+
+	if (!tmp)
+		return;
+
+	while (*tmp) {
+		DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, *tmp);
+		tmp++;
+	}
+}
+
+static void *fork_and_poll(void *args)
+{
+	int outfd, errfd, state;
+	struct lvmpolld_thread_data *data;
+	pid_t r;
+
+	int error = 1;
+	struct lvmpolld_lv *pdlv = (struct lvmpolld_lv *) args;
+	struct lvmpolld_state *ls = pdlv->ls;
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+	data = lvmpolld_thread_data_constructor(pdlv);
+	pthread_setspecific(key, data);
+	pthread_setcancelstate(state, &state);
+
+	if (!data) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "Failed to initialize per-thread data");
+		goto err;
+	}
+
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd line arguments:");
+	debug_print(ls, pdlv->cmdargv);
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "cmd environment variables:");
+	debug_print(ls, pdlv->cmdenvp);
+	DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "---end---");
+
+	outfd = data->outpipe[1];
+	errfd = data->errpipe[1];
+
+	r = fork();
+	if (!r) {
+		/* child */
+		/* !!! Do not touch any posix thread primitives !!! */
+
+		if ((dup2(outfd, STDOUT_FILENO ) != STDOUT_FILENO) ||
+		    (dup2(errfd, STDERR_FILENO ) != STDERR_FILENO))
+			_exit(LVMPD_RET_DUP_FAILED);
+
+		execve(*(pdlv->cmdargv), (char *const *)pdlv->cmdargv, (char *const *)pdlv->cmdenvp);
+
+		_exit(LVMPD_RET_EXC_FAILED);
+	} else {
+		/* parent */
+		if (r == -1) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "fork failed",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+
+		INFO(ls, "%s: LVM2 cmd \"%s\" (PID: %d)", PD_LOG_PREFIX, *(pdlv->cmdargv), r);
+
+		pdlv->cmd_pid = r;
+
+		/* failure to close write end of any pipe will result in broken polling */
+		if (close(data->outpipe[1])) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of pipe",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+		data->outpipe[1] = -1;
+
+		if (close(data->errpipe[1])) {
+			ERROR(ls, "%s: %s: (%d) %s", PD_LOG_PREFIX, "failed to close write end of err pipe",
+			      errno, _strerror_r(errno, data));
+			goto err;
+		}
+		data->errpipe[1] = -1;
+
+		error = poll_for_output(pdlv, data);
+		DEBUGLOG(ls, "%s: %s", PD_LOG_PREFIX, "polling for lvpoll output has finished");
+	}
+
+err:
+	r = 0;
+
+	pdst_lock(pdlv->pdst);
+
+	if (error) {
+		/* last reader is responsible for pdlv cleanup */
+		r = pdlv->cmd_pid;
+		pdlv_set_error(pdlv, 1);
+	}
+
+	pdlv_set_polling_finished(pdlv, 1);
+	if (data)
+		data->pdlv = NULL;
+
+	pdst_locked_dec(pdlv->pdst);
+
+	pdst_unlock(pdlv->pdst);
+
+	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &state);
+	lvmpolld_thread_data_destroy(data);
+	pthread_setspecific(key, NULL);
+	pthread_setcancelstate(state, &state);
+
+	update_idle_state(ls);
+
+	/*
+	 * This is unfortunate case where we
+	 * know nothing about state of lvm cmd and
+	 * (eventually) ongoing progress.
+	 *
+	 * harvest zombies
+	 */
+	if (r)
+		while(waitpid(r, NULL, 0) < 0 && errno == EINTR);
+
+	return NULL;
+}
+
+static response progress_info(client_handle h, struct lvmpolld_state *ls, request req)
+{
+	char *id;
+	struct lvmpolld_lv *pdlv;
+	struct lvmpolld_store *pdst;
+	struct lvmpolld_lv_state st;
+	response r;
+	const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+	const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+	unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+	if (!lvid)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+	id = construct_id(sysdir, lvid);
+	if (!id) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "progress_info request failed to construct ID.");
+		return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+	}
+
+	DEBUGLOG(ls, "%s: %s: %s", PD_LOG_PREFIX, "ID", id);
+
+	pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+	pdst_lock(pdst);
+
+	pdlv = pdst_locked_lookup(pdst, id);
+	if (pdlv) {
+		/*
+		 * with store lock held, I'm the only reader accessing the pdlv
+		 */
+		st = pdlv_get_status(pdlv);
+
+		if (st.error || st.polling_finished) {
+			INFO(ls, "%s: %s %s", PD_LOG_PREFIX,
+			     "Polling finished. Removing related data structure for LV",
+			     lvid);
+			pdst_locked_remove(pdst, id);
+			pdlv_destroy(pdlv);
+		}
+	}
+	/* pdlv must not be dereferenced from now on */
+
+	pdst_unlock(pdst);
+
+	dm_free(id);
+
+	if (pdlv) {
+		if (st.error)
+			return reply(LVMPD_RESP_FAILED, REASON_POLLING_FAILED);
+
+		if (st.polling_finished)
+			r = daemon_reply_simple(LVMPD_RESP_FINISHED,
+						"reason = %s", st.cmd_state.signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE,
+						LVMPD_PARM_VALUE " = %d", (int64_t)(st.cmd_state.signal ?: st.cmd_state.retcode),
+						NULL);
+		else
+			r = daemon_reply_simple(LVMPD_RESP_IN_PROGRESS, NULL);
+	}
+	else
+		r = daemon_reply_simple(LVMPD_RESP_NOT_FOUND, NULL);
+
+	return r;
+}
+
+static struct lvmpolld_lv *construct_pdlv(request req, struct lvmpolld_state *ls,
+				     struct lvmpolld_store *pdst,
+				     const char *interval, const char *id,
+				     const char *vgname, const char *lvname,
+				     const char *sysdir, enum poll_type type,
+				     unsigned abort_polling, unsigned uinterval)
+{
+	const char **cmdargv, **cmdenvp;
+	struct lvmpolld_lv *pdlv;
+	unsigned handle_missing_pvs = daemon_request_int(req, LVMPD_PARM_HANDLE_MISSING_PVS, 0);
+
+	pdlv = pdlv_create(ls, id, vgname, lvname, sysdir, type,
+			   interval, uinterval, pdst);
+
+	if (!pdlv) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to create internal LV data structure.");
+		return NULL;
+	}
+
+	cmdargv = cmdargv_ctr(pdlv, pdlv->ls->lvm_binary, abort_polling, handle_missing_pvs);
+	if (!cmdargv) {
+		pdlv_destroy(pdlv);
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd arguments for lvpoll command");
+		return NULL;
+	}
+
+	pdlv->cmdargv = cmdargv;
+
+	cmdenvp = cmdenvp_ctr(pdlv);
+	if (!cmdenvp) {
+		pdlv_destroy(pdlv);
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to construct cmd environment for lvpoll command");
+		return NULL;
+	}
+
+	pdlv->cmdenvp = cmdenvp;
+
+	return pdlv;
+}
+
+static int spawn_detached_thread(struct lvmpolld_lv *pdlv)
+{
+	int r;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+	r = pthread_create(&pdlv->tid, &attr, fork_and_poll, (void *)pdlv);
+
+	pthread_attr_destroy(&attr);
+
+	return !r;
+}
+
+static response poll_init(client_handle h, struct lvmpolld_state *ls, request req, enum poll_type type)
+{
+	char *id;
+	struct lvmpolld_lv *pdlv;
+	struct lvmpolld_store *pdst;
+	unsigned uinterval;
+
+	const char *interval = daemon_request_str(req, LVMPD_PARM_INTERVAL, NULL);
+	const char *lvid = daemon_request_str(req, LVMPD_PARM_LVID, NULL);
+	const char *lvname = daemon_request_str(req, LVMPD_PARM_LVNAME, NULL);
+	const char *vgname = daemon_request_str(req, LVMPD_PARM_VGNAME, NULL);
+	const char *sysdir = daemon_request_str(req, LVMPD_PARM_SYSDIR, NULL);
+	unsigned abort_polling = daemon_request_int(req, LVMPD_PARM_ABORT, 0);
+
+	assert(type < POLL_TYPE_MAX);
+
+	if (abort_polling && type != PVMOVE)
+		return reply(LVMPD_RESP_EINVAL, REASON_ILLEGAL_ABORT_REQUEST);
+
+	if (!interval || strpbrk(interval, "-") || sscanf(interval, "%u", &uinterval) != 1)
+		return reply(LVMPD_RESP_EINVAL, REASON_INVALID_INTERVAL);
+
+	if (!lvname)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVNAME);
+
+	if (!lvid)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_LVID);
+
+	if (!vgname)
+		return reply(LVMPD_RESP_FAILED, REASON_MISSING_VGNAME);
+
+	id = construct_id(sysdir, lvid);
+	if (!id) {
+		ERROR(ls, "%s: %s", PD_LOG_PREFIX, "poll_init request failed to construct ID.");
+		return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+	}
+
+	DEBUGLOG(ls, "%s: %s=%s", PD_LOG_PREFIX, "ID", id);
+
+	pdst = abort_polling ? ls->id_to_pdlv_abort : ls->id_to_pdlv_poll;
+
+	pdst_lock(pdst);
+
+	pdlv = pdst_locked_lookup(pdst, id);
+	if (pdlv && pdlv_get_polling_finished(pdlv)) {
+		WARN(ls, "%s: %s %s", PD_LOG_PREFIX, "Force removal of uncollected info for LV",
+			 lvid);
+		/* 
+		 * lvmpolld has to remove uncollected results in this case.
+		 * otherwise it would have to refuse request for new polling
+		 * lv with same id.
+		 */
+		pdst_locked_remove(pdst, id);
+		pdlv_destroy(pdlv);
+		pdlv = NULL;
+	}
+
+	if (pdlv) {
+		if (!pdlv_is_type(pdlv, type)) {
+			pdst_unlock(pdst);
+			ERROR(ls, "%s: %s '%s': expected: %s, requested: %s",
+			      PD_LOG_PREFIX, "poll operation type mismatch on LV identified by",
+			      id,
+			      polling_op(pdlv_get_type(pdlv)), polling_op(type));
+			dm_free(id);
+			return reply(LVMPD_RESP_EINVAL,
+				     REASON_DIFFERENT_OPERATION_IN_PROGRESS);
+		}
+		pdlv->init_rq_count++; /* safe. protected by store lock */
+	} else {
+		pdlv = construct_pdlv(req, ls, pdst, interval, id, vgname,
+				      lvname, sysdir, type, abort_polling, 2 * uinterval);
+		if (!pdlv) {
+			pdst_unlock(pdst);
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+		if (!pdst_locked_insert(pdst, id, pdlv)) {
+			pdlv_destroy(pdlv);
+			pdst_unlock(pdst);
+			ERROR(ls, "%s: %s", PD_LOG_PREFIX, "couldn't store internal LV data structure");
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+		if (!spawn_detached_thread(pdlv)) {
+			ERROR(ls, "%s: %s", PD_LOG_PREFIX, "failed to spawn detached monitoring thread");
+			pdst_locked_remove(pdst, id);
+			pdlv_destroy(pdlv);
+			pdst_unlock(pdst);
+			dm_free(id);
+			return reply(LVMPD_RESP_FAILED, REASON_ENOMEM);
+		}
+
+		pdst_locked_inc(pdst);
+		if (ls->idle)
+			ls->idle->is_idle = 0;
+	}
+
+	pdst_unlock(pdst);
+
+	dm_free(id);
+
+	return daemon_reply_simple(LVMPD_RESP_OK, NULL);
+}
+
+static response dump_state(client_handle h, struct lvmpolld_state *ls, request r)
+{
+	response res = { 0 };
+	struct buffer *b = &res.buffer;
+
+	buffer_init(b);
+
+	_lvmpolld_global_lock(ls);
+
+	buffer_append(b, "# Registered polling operations\n\n");
+	buffer_append(b, "poll {\n");
+	pdst_locked_dump(ls->id_to_pdlv_poll, b);
+	buffer_append(b, "}\n\n");
+
+	buffer_append(b, "# Registered abort operations\n\n");
+	buffer_append(b, "abort {\n");
+	pdst_locked_dump(ls->id_to_pdlv_abort, b);
+	buffer_append(b, "}");
+
+	_lvmpolld_global_unlock(ls);
+
+	return res;
+}
+
+static response _handler(struct daemon_state s, client_handle h, request r)
+{
+	struct lvmpolld_state *ls = s.private;
+	const char *rq = daemon_request_str(r, "request", "NONE");
+
+	if (!strcmp(rq, LVMPD_REQ_PVMOVE))
+		return poll_init(h, ls, r, PVMOVE);
+	else if (!strcmp(rq, LVMPD_REQ_CONVERT))
+		return poll_init(h, ls, r, CONVERT);
+	else if (!strcmp(rq, LVMPD_REQ_MERGE))
+		return poll_init(h, ls, r, MERGE);
+	else if (!strcmp(rq, LVMPD_REQ_MERGE_THIN))
+		return poll_init(h, ls, r, MERGE_THIN);
+	else if (!strcmp(rq, LVMPD_REQ_PROGRESS))
+		return progress_info(h, ls, r);
+	else if (!strcmp(rq, LVMPD_REQ_DUMP))
+		return dump_state(h, ls, r);
+	else
+		return reply(LVMPD_RESP_EINVAL, REASON_REQ_NOT_IMPLEMENTED);
+}
+
+static int process_timeout_arg(const char *str, unsigned *max_timeouts)
+{
+	char *endptr;
+	unsigned long l;
+
+	errno = 0;
+	l = strtoul(str, &endptr, 10);
+	if (errno || *endptr || l >= UINT_MAX)
+		return 0;
+
+	*max_timeouts = (unsigned) l;
+
+	return 1;
+}
+
+/* Client functionality */
+typedef int (*action_fn_t) (void *args);
+
+struct log_line_baton {
+	const char *prefix;
+};
+
+daemon_handle _lvmpolld = { .error = 0 };
+
+static daemon_handle _lvmpolld_open(const char *socket)
+{
+	daemon_info lvmpolld_info = {
+		.path = "lvmpolld",
+		.socket = socket ?: DEFAULT_RUN_DIR "/lvmpolld.socket",
+		.protocol = LVMPOLLD_PROTOCOL,
+		.protocol_version = LVMPOLLD_PROTOCOL_VERSION
+	};
+
+	return daemon_open(lvmpolld_info);
+}
+
+static void _log_line(const char *line, void *baton) {
+	struct log_line_baton *b = baton;
+	fprintf(stdout, "%s%s\n", b->prefix, line);
+}
+
+static int printout_raw_response(const char *prefix, const char *msg)
+{
+	struct log_line_baton b = { .prefix = prefix };
+	char *buf;
+	char *pos;
+
+	buf = dm_strdup(msg);
+	pos = buf;
+
+	if (!buf)
+		return 0;
+
+	while (pos) {
+		char *next = strchr(pos, '\n');
+		if (next)
+			*next = 0;
+		_log_line(pos, &b);
+		pos = next ? next + 1 : 0;
+	}
+	dm_free(buf);
+
+	return 1;
+}
+
+/* place all action implementations below */
+
+static int action_dump(void *args __attribute__((unused)))
+{
+	daemon_request req;
+	daemon_reply repl;
+	int r = 0;
+
+	req = daemon_request_make(LVMPD_REQ_DUMP);
+	if (!req.cft) {
+		fprintf(stderr, "Failed to create lvmpolld " LVMPD_REQ_DUMP " request.\n");
+		goto out_req;
+	}
+
+	repl = daemon_send(_lvmpolld, req);
+	if (repl.error) {
+		fprintf(stderr, "Failed to send a request or receive response.\n");
+		goto  out_rep;
+	}
+
+	/*
+	 * This is dumb copy & paste from libdaemon log routines.
+	 */
+	if (!printout_raw_response("  ", repl.buffer.mem)) {
+		fprintf(stderr, "Failed to print out the response.\n");
+		goto  out_rep;
+	}
+
+	r = 1;
+
+out_rep:
+	daemon_reply_destroy(repl);
+out_req:
+	daemon_request_destroy(req);
+
+	return r;
+}
+
+enum action_index {
+	ACTION_DUMP = 0,
+	ACTION_MAX /* keep at the end */
+};
+
+static const action_fn_t actions[ACTION_MAX] = { [ACTION_DUMP] = action_dump };
+
+static int _make_action(enum action_index idx, void *args)
+{
+	return idx < ACTION_MAX ? actions[idx](args) : 0;
+}
+
+static int _lvmpolld_client(const char *socket, unsigned action)
+{
+	int r;
+
+	_lvmpolld = _lvmpolld_open(socket);
+
+	if (_lvmpolld.error || _lvmpolld.socket_fd < 0) {
+		fprintf(stderr, "Failed to establish connection with lvmpolld.\n");
+		return 0;
+	}
+
+	r = _make_action(action, NULL);
+
+	daemon_close(_lvmpolld);
+
+	return r ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+static int action_idx = ACTION_MAX;
+static struct option long_options[] = {
+	/* Have actions always at the beginning of the array. */
+	{"dump",	no_argument,		&action_idx,	ACTION_DUMP }, /* or an option_index ? */
+
+	/* other options */
+	{"binary",	required_argument,	0,		'B' },
+	{"foreground",	no_argument,		0,		'f' },
+	{"help",	no_argument,		0,		'h' },
+	{"log",		required_argument,	0,		'l' },
+	{"pidfile",	required_argument,	0,		'p' },
+	{"socket",	required_argument,	0,		's' },
+	{"timeout",	required_argument,	0,		't' },
+	{"version",	no_argument,		0,		'V' },
+	{0,		0,			0,		0 }
+};
+
+int main(int argc, char *argv[])
+{
+	int opt;
+	int option_index = 0;
+	int client = 0, server = 0;
+	unsigned action = ACTION_MAX;
+	struct timeval timeout;
+	daemon_idle di = { .ptimeout = &timeout };
+	struct lvmpolld_state ls = { .log_config = "" };
+	daemon_state s = {
+		.daemon_fini = _fini,
+		.daemon_init = _init,
+		.handler = _handler,
+		.name = "lvmpolld",
+		.pidfile = getenv("LVM_LVMPOLLD_PIDFILE") ?: LVMPOLLD_PIDFILE,
+		.private = &ls,
+		.protocol = LVMPOLLD_PROTOCOL,
+		.protocol_version = LVMPOLLD_PROTOCOL_VERSION,
+		.socket_path = getenv("LVM_LVMPOLLD_SOCKET") ?: LVMPOLLD_SOCKET,
+	};
+
+	while ((opt = getopt_long(argc, argv, "fhVl:p:s:B:t:", long_options, &option_index)) != -1) {
+		switch (opt) {
+		case 0 :
+			if (action < ACTION_MAX) {
+				fprintf(stderr, "Can't perform more actions. Action already requested: %s\n",
+					long_options[action].name);
+				_usage(argv[0], stderr);
+				exit(EXIT_FAILURE);
+			}
+			action = action_idx;
+			client = 1;
+			break;
+		case '?':
+			_usage(argv[0], stderr);
+			exit(EXIT_FAILURE);
+		case 'B': /* --binary */
+			ls.lvm_binary = optarg;
+			server = 1;
+			break;
+		case 'V': /* --version */
+			printf("lvmpolld version: " LVM_VERSION "\n");
+			exit(EXIT_SUCCESS);
+		case 'f': /* --foreground */
+			s.foreground = 1;
+			server = 1;
+			break;
+		case 'h': /* --help */
+			_usage(argv[0], stdout);
+			exit(EXIT_SUCCESS);
+		case 'l': /* --log */
+			ls.log_config = optarg;
+			server = 1;
+			break;
+		case 'p': /* --pidfile */
+			s.pidfile = optarg;
+			server = 1;
+			break;
+		case 's': /* --socket */
+			s.socket_path = optarg;
+			break;
+		case 't': /* --timeout in seconds */
+			if (!process_timeout_arg(optarg, &di.max_timeouts)) {
+				fprintf(stderr, "Invalid value of timeout parameter.\n");
+				exit(EXIT_FAILURE);
+			}
+			/* 0 equals to wait indefinitely */
+			if (di.max_timeouts)
+				s.idle = ls.idle = &di;
+			server = 1;
+			break;
+		}
+	}
+
+	if (client && server) {
+		fprintf(stderr, "Invalid combination of client and server parameters.\n\n");
+		_usage(argv[0], stdout);
+		exit(EXIT_FAILURE);
+	}
+
+	if (client)
+		return _lvmpolld_client(s.socket_path, action);
+
+	/* Server */
+	daemon_start(s);
+
+	return EXIT_SUCCESS;
+}
--- a/daemons/lvmpolld/lvmpolld-data-utils.c
+++ b/daemons/lvmpolld/lvmpolld-data-utils.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "lvmpolld-common.h"
+
+#include "config-util.h"
+
+#include <fcntl.h>
+#include <signal.h>
+
+static char *_construct_full_lvname(const char *vgname, const char *lvname)
+{
+	char *name;
+	size_t l;
+
+	l = strlen(vgname) + strlen(lvname) + 2; /* vg/lv and \0 */
+	name = (char *) dm_malloc(l * sizeof(char));
+	if (!name)
+		return NULL;
+
+	if (dm_snprintf(name, l, "%s/%s", vgname, lvname) < 0) {
+		dm_free(name);
+		name = NULL;
+	}
+
+	return name;
+}
+
+static char *_construct_lvm_system_dir_env(const char *sysdir)
+{
+	/*
+	 *  Store either "LVM_SYSTEM_DIR=/path/to..."
+	 *		    - or -
+	 *  just single char to store NULL byte
+	 */
+	size_t l = sysdir ? strlen(sysdir) + 16 : 1;
+	char *env = (char *) dm_malloc(l * sizeof(char));
+
+	if (!env)
+		return NULL;
+
+	*env = '\0';
+
+	if (sysdir && dm_snprintf(env, l, "LVM_SYSTEM_DIR=%s", sysdir) < 0) {
+		dm_free(env);
+		env = NULL;
+	}
+
+	return env;
+}
+
+static const char *_get_lvid(const char *lvmpolld_id, const char *sysdir)
+{
+	return lvmpolld_id ? (lvmpolld_id + (sysdir ? strlen(sysdir) : 0)) : NULL;
+}
+
+char *construct_id(const char *sysdir, const char *uuid)
+{
+	char *id;
+	int r;
+	size_t l;
+
+	l = strlen(uuid) + (sysdir ? strlen(sysdir) : 0) + 1;
+	id = (char *) dm_malloc(l * sizeof(char));
+	if (!id)
+		return NULL;
+
+	r = sysdir ? dm_snprintf(id, l, "%s%s", sysdir, uuid) :
+		     dm_snprintf(id, l, "%s", uuid);
+
+	if (r < 0) {
+		dm_free(id);
+		id = NULL;
+	}
+
+	return id;
+}
+
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+			   const char *vgname, const char *lvname,
+			   const char *sysdir, enum poll_type type,
+			   const char *sinterval, unsigned pdtimeout,
+			   struct lvmpolld_store *pdst)
+{
+	char *lvmpolld_id = dm_strdup(id), /* copy */
+	     *full_lvname = _construct_full_lvname(vgname, lvname), /* copy */
+	     *lvm_system_dir_env = _construct_lvm_system_dir_env(sysdir); /* copy */
+
+	struct lvmpolld_lv tmp = {
+		.ls = ls,
+		.type = type,
+		.lvmpolld_id = lvmpolld_id,
+		.lvid = _get_lvid(lvmpolld_id, sysdir),
+		.lvname = full_lvname,
+		.lvm_system_dir_env = lvm_system_dir_env,
+		.sinterval = dm_strdup(sinterval), /* copy */
+		.pdtimeout = pdtimeout < MIN_POLLING_TIMEOUT ? MIN_POLLING_TIMEOUT : pdtimeout,
+		.cmd_state = { .retcode = -1, .signal = 0 },
+		.pdst = pdst,
+		.init_rq_count = 1
+	}, *pdlv = (struct lvmpolld_lv *) dm_malloc(sizeof(struct lvmpolld_lv));
+
+	if (!pdlv || !tmp.lvid || !tmp.lvname || !tmp.lvm_system_dir_env || !tmp.sinterval)
+		goto err;
+
+	memcpy(pdlv, &tmp, sizeof(*pdlv));
+
+	if (pthread_mutex_init(&pdlv->lock, NULL))
+		goto err;
+
+	return pdlv;
+
+err:
+	dm_free((void *)full_lvname);
+	dm_free((void *)lvmpolld_id);
+	dm_free((void *)lvm_system_dir_env);
+	dm_free((void *)tmp.sinterval);
+	dm_free((void *)pdlv);
+
+	return NULL;
+}
+
+void pdlv_destroy(struct lvmpolld_lv *pdlv)
+{
+	dm_free((void *)pdlv->lvmpolld_id);
+	dm_free((void *)pdlv->lvname);
+	dm_free((void *)pdlv->sinterval);
+	dm_free((void *)pdlv->lvm_system_dir_env);
+	dm_free((void *)pdlv->cmdargv);
+	dm_free((void *)pdlv->cmdenvp);
+
+	pthread_mutex_destroy(&pdlv->lock);
+
+	dm_free((void *)pdlv);
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv)
+{
+	unsigned ret;
+
+	pdlv_lock(pdlv);
+	ret = pdlv->polling_finished;
+	pdlv_unlock(pdlv);
+
+	return ret;
+}
+
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv)
+{
+	struct lvmpolld_lv_state r;
+
+	pdlv_lock(pdlv);
+	r.error = pdlv_locked_error(pdlv);
+	r.polling_finished = pdlv_locked_polling_finished(pdlv);
+	r.cmd_state = pdlv_locked_cmd_state(pdlv);
+	pdlv_unlock(pdlv);
+
+	return r;
+}
+
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state)
+{
+	pdlv_lock(pdlv);
+	pdlv->cmd_state = *cmd_state;
+	pdlv_unlock(pdlv);
+}
+
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error)
+{
+	pdlv_lock(pdlv);
+	pdlv->error = error;
+	pdlv_unlock(pdlv);
+}
+
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished)
+{
+	pdlv_lock(pdlv);
+	pdlv->polling_finished = finished;
+	pdlv_unlock(pdlv);
+}
+
+struct lvmpolld_store *pdst_init(const char *name)
+{
+	struct lvmpolld_store *pdst = (struct lvmpolld_store *) dm_malloc(sizeof(struct lvmpolld_store));
+	if (!pdst)
+		return NULL;
+
+	pdst->store = dm_hash_create(32);
+	if (!pdst->store)
+		goto err_hash;
+	if (pthread_mutex_init(&pdst->lock, NULL))
+		goto err_mutex;
+
+	pdst->name = name;
+	pdst->active_polling_count = 0;
+
+	return pdst;
+
+err_mutex:
+	dm_hash_destroy(pdst->store);
+err_hash:
+	dm_free(pdst);
+	return NULL;
+}
+
+void pdst_destroy(struct lvmpolld_store *pdst)
+{
+	if (!pdst)
+		return;
+
+	dm_hash_destroy(pdst->store);
+	pthread_mutex_destroy(&pdst->lock);
+	dm_free(pdst);
+}
+
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_lock(dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_unlock(dm_hash_get_data(pdst->store, n));
+}
+
+static void _pdlv_locked_dump(struct buffer *buff, const struct lvmpolld_lv *pdlv)
+{
+	char tmp[1024];
+	const struct lvmpolld_cmd_stat *cmd_state = &pdlv->cmd_state;
+
+	/* pdlv-section { */
+	if (dm_snprintf(tmp, sizeof(tmp), "\t%s {\n", pdlv->lvmpolld_id) > 0)
+		buffer_append(buff, tmp);
+
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvid=\"%s\"\n", pdlv->lvid) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\ttype=\"%s\"\n", polling_op(pdlv->type)) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvname=\"%s\"\n", pdlv->lvname) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvmpolld_internal_timeout=%d\n", pdlv->pdtimeout) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_interval=\"%s\"\n", pdlv->sinterval ?: "<undefined>") > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tLVM_SYSTEM_DIR=\"%s\"\n",
+			(*pdlv->lvm_system_dir_env ? (pdlv->lvm_system_dir_env + strlen("LVM_SYSTEM_DIR=")) : "<undefined>")) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tlvm_command_pid=%d\n", pdlv->cmd_pid) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tpolling_finished=%d\n", pdlv->polling_finished) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\terror_occured=%d\n", pdlv->error) > 0)
+		buffer_append(buff, tmp);
+	if (dm_snprintf(tmp, sizeof(tmp), "\t\tinit_requests_count=%d\n", pdlv->init_rq_count) > 0)
+		buffer_append(buff, tmp);
+
+	/* lvm_commmand-section { */
+	buffer_append(buff, "\t\tlvm_command {\n");
+	if (cmd_state->retcode == -1 && !cmd_state->signal)
+		buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_IN_PROGRESS "\"\n");
+	else {
+		buffer_append(buff, "\t\t\tstate=\"" LVMPD_RESP_FINISHED "\"\n");
+		if (dm_snprintf(tmp, sizeof(tmp), "\t\t\treason=\"%s\"\n\t\t\tvalue=%d\n",
+				(cmd_state->signal ? LVMPD_REAS_SIGNAL : LVMPD_REAS_RETCODE),
+				(cmd_state->signal ?: cmd_state->retcode)) > 0)
+			buffer_append(buff, tmp);
+	}
+	buffer_append(buff, "\t\t}\n");
+	/* } lvm_commmand-section */
+
+	buffer_append(buff, "\t}\n");
+	/* } pdlv-section */
+}
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		_pdlv_locked_dump(buff, dm_hash_get_data(pdst->store, n));
+}
+
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst)
+{
+	struct lvmpolld_lv *pdlv;
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store) {
+		pdlv = dm_hash_get_data(pdst->store, n);
+		if (!pdlv_locked_polling_finished(pdlv))
+			pthread_cancel(pdlv->tid);
+	}
+}
+
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst)
+{
+	struct dm_hash_node *n;
+
+	dm_hash_iterate(n, pdst->store)
+		pdlv_destroy(dm_hash_get_data(pdst->store, n));
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv)
+{
+	struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) dm_malloc(sizeof(struct lvmpolld_thread_data));
+	if (!data)
+		return NULL;
+
+	data->pdlv = NULL;
+	data->line = NULL;
+	data->line_size = 0;
+	data->fout = data->ferr = NULL;
+	data->outpipe[0] = data->outpipe[1] = data->errpipe[0] = data->errpipe[1] = -1;
+
+	if (pipe(data->outpipe) || pipe(data->errpipe)) {
+		lvmpolld_thread_data_destroy(data);
+		return NULL;
+	}
+
+	if (fcntl(data->outpipe[0], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->outpipe[1], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->errpipe[0], F_SETFD, FD_CLOEXEC) ||
+	    fcntl(data->errpipe[1], F_SETFD, FD_CLOEXEC)) {
+		lvmpolld_thread_data_destroy(data);
+		return NULL;
+	}
+
+	data->pdlv = pdlv;
+
+	return data;
+}
+
+void lvmpolld_thread_data_destroy(void *thread_private)
+{
+	struct lvmpolld_thread_data *data = (struct lvmpolld_thread_data *) thread_private;
+	if (!data)
+		return;
+
+	if (data->pdlv) {
+		pdst_lock(data->pdlv->pdst);
+		/*
+		 * FIXME: skip this step if lvmpolld is activated
+		 * 	  by systemd.
+		 */
+		if (!pdlv_get_polling_finished(data->pdlv))
+			kill(data->pdlv->cmd_pid, SIGTERM);
+		pdlv_set_polling_finished(data->pdlv, 1);
+		pdst_locked_dec(data->pdlv->pdst);
+		pdst_unlock(data->pdlv->pdst);
+	}
+
+	/* may get reallocated in getline(). dm_free must not be used */
+	free(data->line);
+
+	if (data->fout && !fclose(data->fout))
+		data->outpipe[0] = -1;
+
+	if (data->ferr && !fclose(data->ferr))
+		data->errpipe[0] = -1;
+
+	if (data->outpipe[0] >= 0)
+		(void) close(data->outpipe[0]);
+
+	if (data->outpipe[1] >= 0)
+		(void) close(data->outpipe[1]);
+
+	if (data->errpipe[0] >= 0)
+		(void) close(data->errpipe[0]);
+
+	if (data->errpipe[1] >= 0)
+		(void) close(data->errpipe[1]);
+
+	dm_free(data);
+}
--- a/daemons/lvmpolld/lvmpolld-data-utils.h
+++ b/daemons/lvmpolld/lvmpolld-data-utils.h
@@ -0,0 +1,215 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_DATA_UTILS_H
+#define _LVM_LVMPOLLD_DATA_UTILS_H
+
+#include <pthread.h>
+
+struct buffer;
+struct lvmpolld_state;
+
+enum poll_type {
+	PVMOVE = 0,
+	CONVERT,
+	MERGE,
+	MERGE_THIN,
+	POLL_TYPE_MAX
+};
+
+struct lvmpolld_cmd_stat {
+	int retcode;
+	int signal;
+};
+
+struct lvmpolld_store {
+	pthread_mutex_t lock;
+	void *store;
+	const char *name;
+	unsigned active_polling_count;
+};
+
+struct lvmpolld_lv {
+	/*
+	 * accessing following vars doesn't
+	 * require struct lvmpolld_lv lock
+	 */
+	struct lvmpolld_state *const ls;
+	const enum poll_type type;
+	const char *const lvid;
+	const char *const lvmpolld_id;
+	const char *const lvname; /* full vg/lv name */
+	const unsigned pdtimeout; /* in seconds */
+	const char *const sinterval;
+	const char *const lvm_system_dir_env;
+	struct lvmpolld_store *const pdst;
+	const char *const *cmdargv;
+	const char *const *cmdenvp;
+
+	/* only used by write */
+	pid_t cmd_pid;
+	pthread_t tid;
+
+	pthread_mutex_t lock;
+
+	/* block of shared variables protected by lock */
+	struct lvmpolld_cmd_stat cmd_state;
+	unsigned init_rq_count; /* for debuging purposes only */
+	unsigned polling_finished:1; /* no more updates */
+	unsigned error:1; /* unrecoverable error occured in lvmpolld */
+};
+
+typedef void (*lvmpolld_parse_output_fn_t) (struct lvmpolld_lv *pdlv, const char *line);
+
+/* TODO: replace with configuration option */
+#define MIN_POLLING_TIMEOUT 60
+
+struct lvmpolld_lv_state {
+	unsigned error:1;
+	unsigned polling_finished:1;
+	struct lvmpolld_cmd_stat cmd_state;
+};
+
+struct lvmpolld_thread_data {
+	char *line;
+	size_t line_size;
+	int outpipe[2];
+	int errpipe[2];
+	FILE *fout;
+	FILE *ferr;
+	char buf[1024];
+	struct lvmpolld_lv *pdlv;
+};
+
+char *construct_id(const char *sysdir, const char *lvid);
+
+/* LVMPOLLD_LV_T section */
+
+/* only call with appropriate struct lvmpolld_store lock held */
+struct lvmpolld_lv *pdlv_create(struct lvmpolld_state *ls, const char *id,
+			   const char *vgname, const char *lvname,
+			   const char *sysdir, enum poll_type type,
+			   const char *sinterval, unsigned pdtimeout,
+			   struct lvmpolld_store *pdst);
+
+/* only call with appropriate struct lvmpolld_store lock held */
+void pdlv_destroy(struct lvmpolld_lv *pdlv);
+
+static inline void pdlv_lock(struct lvmpolld_lv *pdlv)
+{
+	pthread_mutex_lock(&pdlv->lock);
+}
+
+static inline void pdlv_unlock(struct lvmpolld_lv *pdlv)
+{
+	pthread_mutex_unlock(&pdlv->lock);
+}
+
+/*
+ * no struct lvmpolld_lv lock required section
+ */
+static inline int pdlv_is_type(const struct lvmpolld_lv *pdlv, enum poll_type type)
+{
+	return pdlv->type == type;
+}
+
+static inline unsigned pdlv_get_timeout(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->pdtimeout;
+}
+
+static inline enum poll_type pdlv_get_type(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->type;
+}
+
+unsigned pdlv_get_polling_finished(struct lvmpolld_lv *pdlv);
+struct lvmpolld_lv_state pdlv_get_status(struct lvmpolld_lv *pdlv);
+void pdlv_set_cmd_state(struct lvmpolld_lv *pdlv, const struct lvmpolld_cmd_stat *cmd_state);
+void pdlv_set_error(struct lvmpolld_lv *pdlv, unsigned error);
+void pdlv_set_polling_finished(struct lvmpolld_lv *pdlv, unsigned finished);
+
+/*
+ * struct lvmpolld_lv lock required section
+ */
+static inline struct lvmpolld_cmd_stat pdlv_locked_cmd_state(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->cmd_state;
+}
+
+static inline int pdlv_locked_polling_finished(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->polling_finished;
+}
+
+static inline unsigned pdlv_locked_error(const struct lvmpolld_lv *pdlv)
+{
+	return pdlv->error;
+}
+
+/* struct lvmpolld_store manipulation routines */
+
+struct lvmpolld_store *pdst_init(const char *name);
+void pdst_destroy(struct lvmpolld_store *pdst);
+
+void pdst_locked_dump(const struct lvmpolld_store *pdst, struct buffer *buff);
+void pdst_locked_lock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_unlock_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_destroy_all_pdlvs(const struct lvmpolld_store *pdst);
+void pdst_locked_send_cancel(const struct lvmpolld_store *pdst);
+
+static inline void pdst_lock(struct lvmpolld_store *pdst)
+{
+	pthread_mutex_lock(&pdst->lock);
+}
+
+static inline void pdst_unlock(struct lvmpolld_store *pdst)
+{
+	pthread_mutex_unlock(&pdst->lock);
+}
+
+static inline void pdst_locked_inc(struct lvmpolld_store *pdst)
+{
+	pdst->active_polling_count++;
+}
+
+static inline void pdst_locked_dec(struct lvmpolld_store *pdst)
+{
+	pdst->active_polling_count--;
+}
+
+static inline unsigned pdst_locked_get_active_count(const struct lvmpolld_store *pdst)
+{
+	return pdst->active_polling_count;
+}
+
+static inline int pdst_locked_insert(struct lvmpolld_store *pdst, const char *key, struct lvmpolld_lv *pdlv)
+{
+	return dm_hash_insert(pdst->store, key, pdlv);
+}
+
+static inline struct lvmpolld_lv *pdst_locked_lookup(struct lvmpolld_store *pdst, const char *key)
+{
+	return dm_hash_lookup(pdst->store, key);
+}
+
+static inline void pdst_locked_remove(struct lvmpolld_store *pdst, const char *key)
+{
+	dm_hash_remove(pdst->store, key);
+}
+
+struct lvmpolld_thread_data *lvmpolld_thread_data_constructor(struct lvmpolld_lv *pdlv);
+void lvmpolld_thread_data_destroy(void *thread_private);
+
+#endif /* _LVM_LVMPOLLD_DATA_UTILS_H */
--- a/daemons/lvmpolld/lvmpolld-protocol.h
+++ b/daemons/lvmpolld/lvmpolld-protocol.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_LVMPOLLD_PROTOCOL_H
+#define _LVM_LVMPOLLD_PROTOCOL_H
+
+#include "polling_ops.h"
+
+#define LVMPOLLD_PROTOCOL "lvmpolld"
+#define LVMPOLLD_PROTOCOL_VERSION 1
+
+#define LVMPD_REQ_CONVERT	CONVERT_POLL
+#define LVMPD_REQ_DUMP		"dump"
+#define LVMPD_REQ_MERGE		MERGE_POLL
+#define LVMPD_REQ_MERGE_THIN	MERGE_THIN_POLL
+#define LVMPD_REQ_PROGRESS	"progress_info"
+#define LVMPD_REQ_PVMOVE	PVMOVE_POLL
+
+#define LVMPD_PARM_ABORT		"abort"
+#define LVMPD_PARM_HANDLE_MISSING_PVS	"handle_missing_pvs"
+#define LVMPD_PARM_INTERVAL		"interval"
+#define LVMPD_PARM_LVID			"lvid"
+#define LVMPD_PARM_LVNAME		"lvname"
+#define LVMPD_PARM_SYSDIR		"sysdir"
+#define LVMPD_PARM_VALUE		"value" /* either retcode or signal value */
+#define LVMPD_PARM_VGNAME		"vgname"
+
+#define LVMPD_RESP_FAILED	"failed"
+#define LVMPD_RESP_FINISHED	"finished"
+#define LVMPD_RESP_IN_PROGRESS	"in_progress"
+#define LVMPD_RESP_EINVAL	"invalid"
+#define LVMPD_RESP_NOT_FOUND	"not_found"
+#define LVMPD_RESP_OK		"OK"
+
+#define LVMPD_REAS_RETCODE	"retcode" /* lvm cmd ret code */
+#define LVMPD_REAS_SIGNAL	"signal" /* lvm cmd terminating singal */
+
+#define LVMPD_RET_DUP_FAILED	100
+#define LVMPD_RET_EXC_FAILED	101
+
+#endif /* _LVM_LVMPOLLD_PROTOCOL_H */
--- a/daemons/lvmpolld/polling_ops.h
+++ b/daemons/lvmpolld/polling_ops.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2014-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _LVM_TOOL_POLLING_OPS_H
+#define _LVM_TOOL_POLLING_OPS_H
+
+/* this file is also part of lvmpolld protocol */
+
+#define PVMOVE_POLL "pvmove"
+#define CONVERT_POLL "convert"
+#define MERGE_POLL "merge"
+#define MERGE_THIN_POLL "merge_thin"
+
+#endif /* _LVM_TOOL_POLLING_OPS_H */
--- a/doc/kernel/cache-policies.txt
+++ b/doc/kernel/cache-policies.txt
@@ -30,28 +30,48 @@ multiqueue

 This policy is the default.

-The multiqueue policy has two sets of 16 queues: one set for entries
-waiting for the cache and another one for those in the cache.
+The multiqueue policy has three sets of 16 queues: one set for entries
+waiting for the cache and another two for those in the cache (a set for
+clean entries and a set for dirty entries).
+
 Cache entries in the queues are aged based on logical time. Entry into
 the cache is based on variable thresholds and queue selection is based
 on hit count on entry. The policy aims to take different cache miss
 costs into account and to adjust to varying load patterns automatically.

 Message and constructor argument pairs are:
-	'sequential_threshold <#nr_sequential_ios>' and
-	'random_threshold <#nr_random_ios>'.
+	'sequential_threshold <#nr_sequential_ios>'
+	'random_threshold <#nr_random_ios>'
+	'read_promote_adjustment <value>'
+	'write_promote_adjustment <value>'
+	'discard_promote_adjustment <value>'

 The sequential threshold indicates the number of contiguous I/Os
-required before a stream is treated as sequential.  The random threshold
+required before a stream is treated as sequential.  Once a stream is
+considered sequential it will bypass the cache.  The random threshold
 is the number of intervening non-contiguous I/Os that must be seen
 before the stream is treated as random again.

 The sequential and random thresholds default to 512 and 4 respectively.

-Large, sequential ios are probably better left on the origin device
-since spindles tend to have good bandwidth. The io_tracker counts
-contiguous I/Os to try to spot when the io is in one of these sequential
-modes.
+Large, sequential I/Os are probably better left on the origin device
+since spindles tend to have good sequential I/O bandwidth.  The
+io_tracker counts contiguous I/Os to try to spot when the I/O is in one
+of these sequential modes.  But there are use-cases for wanting to
+promote sequential blocks to the cache (e.g. fast application startup).
+If sequential threshold is set to 0 the sequential I/O detection is
+disabled and sequential I/O will no longer implicitly bypass the cache.
+Setting the random threshold to 0 does _not_ disable the random I/O
+stream detection.
+
+Internally the mq policy determines a promotion threshold.  If the hit
+count of a block not in the cache goes above this threshold it gets
+promoted to the cache.  The read, write and discard promote adjustment
+tunables allow you to tweak the promotion threshold by adding a small
+value based on the io type.  They default to 4, 8 and 1 respectively.
+If you're trying to quickly warm a new cache device you may wish to
+reduce these to encourage promotion.  Remember to switch them back to
+their defaults after the cache fills though.

 cleaner
 -------
--- a/doc/kernel/cache.txt
+++ b/doc/kernel/cache.txt
@@ -50,14 +50,16 @@ other parameters detailed later):
   which are dirty, and extra hints for use by the policy object.
   This information could be put on the cache device, but having it
   separate allows the volume manager to configure it differently,
-   e.g. as a mirror for extra robustness.
+   e.g. as a mirror for extra robustness.  This metadata device may only
+   be used by a single cache device.

 Fixed block size
 ----------------

 The origin is divided up into blocks of a fixed size.  This block size
 is configurable when you first create the cache.  Typically we've been
-using block sizes of 256k - 1024k.
+using block sizes of 256KB - 1024KB.  The block size must be between 64
+(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).

 Having a fixed block size simplifies the target a lot.  But it is
 something of a compromise.  For instance, a small part of a block may be
@@ -66,10 +68,11 @@ So large block sizes are bad because they waste cache space.  And small
 block sizes are bad because they increase the amount of metadata (both
 in core and on disk).

-Writeback/writethrough
----------------------
+Cache operating modes
+---------------------

-The cache has two modes, writeback and writethrough.
+The cache has three operating modes: writeback, writethrough and
+passthrough.

 If writeback, the default, is selected then a write to a block that is
 cached will go only to the cache and the block will be marked dirty in
@@ -79,15 +82,38 @@ If writethrough is selected then a write to a cached block will not
 complete until it has hit both the origin and cache devices.  Clean
 blocks should remain clean.

+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates.  To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency.  Coherency that exists is maintained, although
+the cache will gradually cool as writes take place.  If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm.  Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
 A simple cleaner policy is provided, which will clean (write back) all
-dirty blocks in a cache.  Useful for decommissioning a cache.
+dirty blocks in a cache.  Useful for decommissioning a cache or when
+shrinking a cache.  Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean.  If the
+area being removed from the cache still contains dirty blocks the resize
+will fail.  Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean.  This is of particular
+importance if writeback mode is used.  Writethrough and passthrough
+modes already maintain a clean cache.  Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.

 Migration throttling
 --------------------

 Migrating data between the origin and cache device uses bandwidth.
 The user can set a throttle to prevent more than a certain amount of
-migration occuring at any one time.  Currently we're not taking any
+migration occurring at any one time.  Currently we're not taking any
 account of normal io traffic going to the devices.  More work needs
 doing here to avoid migrating during those peak io moments.

@@ -98,12 +124,11 @@ the default being 204800 sectors (or 100MB).
 Updating on-disk metadata
 -------------------------

-On-disk metadata is committed every time a REQ_SYNC or REQ_FUA bio is
-written.  If no such requests are made then commits will occur every
-second.  This means the cache behaves like a physical disk that has a
-write cache (the same is true of the thin-provisioning target).  If
-power is lost you may lose some recent writes.  The metadata should
-always be consistent in spite of any crash.
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the cache behaves like a physical disk that has a volatile write
+cache.  If power is lost you may lose some recent writes.  The metadata
+should always be consistent in spite of any crash.

 The 'dirty' state for a cache block changes far too frequently for us
 to keep updating it on the fly.  So we treat it as a hint.  In normal
@@ -159,7 +184,7 @@ Constructor
 block size      : cache unit size in sectors

 #feature args   : number of feature arguments passed
- feature args    : writethrough.  (The default is writeback.)
+ feature args    : writethrough or passthrough (The default is writeback.)

 policy          : the replacement policy to use
 #policy args    : an even number of arguments corresponding to
@@ -175,6 +200,13 @@ Optional feature arguments are:
 		   back cache block contents later for performance reasons,
 		   so they may differ from the corresponding origin blocks.

+   passthrough	 : a degraded mode useful for various cache coherency
+		   situations (e.g., rolling back snapshots of
+		   underlying storage).	 Reads and writes always go to
+		   the origin.	If a write goes to a cached origin
+		   block, then the cache block is invalidated.
+		   To enable passthrough mode the cache must be clean.
+
 A policy called 'default' is always registered.  This is an alias for
 the policy we currently think is giving best all round performance.

@@ -184,36 +216,43 @@ the characteristics of a specific policy, always request it by name.
 Status
 ------

-<#used metadata blocks>/<#total metadata blocks> <#read hits> <#read misses>
-<#write hits> <#write misses> <#demotions> <#promotions> <#blocks in cache>
-<#dirty> <#features> <features>* <#core args> <core args>* <#policy args>
-<policy args>*
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<cache block size> <#used cache blocks>/<#total cache blocks>
+<#read hits> <#read misses> <#write hits> <#write misses>
+<#demotions> <#promotions> <#dirty> <#features> <features>*
+<#core args> <core args>* <policy name> <#policy args> <policy args>*

-#used metadata blocks    : Number of metadata blocks used
-#total metadata blocks   : Total number of metadata blocks
-#read hits               : Number of times a READ bio has been mapped
+metadata block size	 : Fixed block size for each metadata block in
+			     sectors
+#used metadata blocks	 : Number of metadata blocks used
+#total metadata blocks	 : Total number of metadata blocks
+cache block size	 : Configurable block size for the cache device
+			     in sectors
+#used cache blocks	 : Number of blocks resident in the cache
+#total cache blocks	 : Total number of cache blocks
+#read hits		 : Number of times a READ bio has been mapped
 			     to the cache
-#read misses             : Number of times a READ bio has been mapped
+#read misses		 : Number of times a READ bio has been mapped
 			     to the origin
-#write hits              : Number of times a WRITE bio has been mapped
+#write hits		 : Number of times a WRITE bio has been mapped
 			     to the cache
-#write misses            : Number of times a WRITE bio has been
+#write misses		 : Number of times a WRITE bio has been
 			     mapped to the origin
-#demotions               : Number of times a block has been removed
+#demotions		 : Number of times a block has been removed
 			     from the cache
-#promotions              : Number of times a block has been moved to
+#promotions		 : Number of times a block has been moved to
 			     the cache
-#blocks in cache         : Number of blocks resident in the cache
-#dirty                   : Number of blocks in the cache that differ
+#dirty			 : Number of blocks in the cache that differ
 			     from the origin
-#feature args            : Number of feature args to follow
-feature args             : 'writethrough' (optional)
-#core args               : Number of core arguments (must be even)
-core args                : Key/value pairs for tuning the core
+#feature args		 : Number of feature args to follow
+feature args		 : 'writethrough' (optional)
+#core args		 : Number of core arguments (must be even)
+core args		 : Key/value pairs for tuning the core
 			     e.g. migration_threshold
-#policy args             : Number of policy arguments to follow (must be even)
-policy args              : Key/value pairs
-			     e.g. 'sequential_threshold 1024
+policy name		 : Name of the policy
+#policy args		 : Number of policy arguments to follow (must be even)
+policy args		 : Key/value pairs
+			     e.g. sequential_threshold

 Messages
 --------
@@ -229,12 +268,28 @@ The message format is:
 E.g.
   dmsetup message my_cache 0 sequential_threshold 1024

+
+Invalidation is removing an entry from the cache without writing it
+back.  Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges.  Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9.  Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexidecimal may be needed to better support efficient
+invalidation of larger caches.  The cache must be in passthrough mode
+when invalidate_cblocks is used.
+
+   invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.
+   dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
 Examples
 ========

 The test suite can be found here:

-https://github.com/jthornber/thinp-test-suite
+https://github.com/jthornber/device-mapper-test-suite

 dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
 	/dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
--- a/doc/kernel/crypt.txt
+++ b/doc/kernel/crypt.txt
@@ -4,12 +4,15 @@ dm-crypt
 Device-Mapper's "crypt" target provides transparent encryption of block devices
 using the kernel crypto API.

+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
 Parameters: <cipher> <key> <iv_offset> <device path> \
 	      <offset> [<#opt_params> <opt_params>]

 <cipher>
    Encryption cipher and an optional IV generation mode.
-    (In format cipher[:keycount]-chainmode-ivopts:ivmode).
+    (In format cipher[:keycount]-chainmode-ivmode[:ivopts]).
    Examples:
       des
       aes-cbc-essiv:sha256
@@ -19,7 +22,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> \

 <key>
    Key used for encryption. It is encoded as a hexadecimal number.
-    You can only use key sizes that are valid for the selected cipher.
+    You can only use key sizes that are valid for the selected cipher
+    in combination with the selected iv mode.
+    Note that for some iv modes the key string can contain additional
+    keys (for example IV seed) so the key contains more parts concatenated
+    into a single string.

 <keycount>
    Multi-key compatibility mode. You can define <keycount> keys and
@@ -44,7 +51,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> \
    Otherwise #opt_params is the number of following arguments.

    Example of optional parameters section:
-        1 allow_discards
+        3 allow_discards same_cpu_crypt submit_from_crypt_cpus

 allow_discards
    Block discard requests (a.k.a. TRIM) are passed through the crypt device.
@@ -56,11 +63,24 @@ allow_discards
    used space etc.) if the discarded blocks can be located easily on the
    device later.

+same_cpu_crypt
+    Perform encryption using the same cpu that IO was submitted on.
+    The default is to use an unbound workqueue so that encryption work
+    is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+    Disable offloading writes to a separate thread after encryption.
+    There are some situations where offloading write bios from the
+    encryption threads to a single thread degrades performance
+    significantly.  The default is to offload write bios to the same
+    thread because it benefits CFQ to have writes submitted using the
+    same context.
+
 Example scripts
 ===============
 LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
 encryption with dm-crypt using the 'cryptsetup' utility, see
-http://code.google.com/p/cryptsetup/
+https://gitlab.com/cryptsetup/cryptsetup

 [[
 #!/bin/sh
--- a/doc/kernel/era.txt
+++ b/doc/kernel/era.txt
@@ -0,0 +1,108 @@
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target.  In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'.  Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+ era <metadata dev> <origin dev> <block size>
+
+ metadata dev    : fast device holding the persistent metadata
+ origin dev	 : device holding data blocks that may change
+ block size      : block size of origin data device, granularity that is
+		     tracked by the target
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era.  You shouldn't assume the era has
+incremented.  After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+metadata block size	 : Fixed block size for each metadata block in
+			     sectors
+#used metadata blocks	 : Number of metadata blocks used
+#total metadata blocks	 : Total number of metadata blocks
+current era		 : The current era
+held metadata root	 : The location, in blocks, of the metadata root
+			     that has been 'held' for userspace read
+			     access. '-' indicates there is no held root
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+  associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+  by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era.  It also
+has a spare bitset ready for switching over to a new era.  Other than
+that it uses a few 4k blocks for updating metadata.
+
+   (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed.  As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+    https://github.com/jthornber/thin-provisioning-tools
--- a/doc/kernel/log-writes.txt
+++ b/doc/kernel/log-writes.txt
@@ -0,0 +1,140 @@
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to.  This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log.  The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache.  This means that normal WRITE requests are not actually logged until the
+next REQ_FLUSH request.  This is to make it easier for userspace to replay the
+log in a way that correlates to what is on disk and not what is in cache, to
+make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_FLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
+completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures.  Consider the
+following example (W means write, C means complete):
+
+W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following
+
+W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_DISCARD requests are treated like WRITE requests.  Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request.  Consider the following example:
+
+WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this
+
+DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+   log-writes <dev_path> <log_dev_path>
+
+   dev_path	: Device that all of the IO will go to normally.
+   log_dev_path : Device where the log entries are written to.
+
+ii) Status
+
+    <#logged entries> <highest allocated sector>
+
+    #logged entries	       : Number of logged entries
+    highest allocated sector   : Highest allocated sector
+
+iii) Messages
+
+    mark <description>
+
+	You can use a dmsetup message to set an arbitrary mark in a log.
+	For example say you want to fsck a file system after every
+	write, but first you need to replay up to the mkfs to make sure
+	we're fsck'ing something reasonable, you would do something like
+	this:
+
+	  mkfs.btrfs -f /dev/mapper/log
+	  dmsetup message log 0 mark mkfs
+	  <run test>
+
+	  This would allow you to replay the log up to the mkfs mark and
+	  then replay from that point on doing the fsck check in the
+	  interval that you want.
+
+	Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system.  You would do something like
+this:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<some test that does fsync at the end>
+dmsetup message log 0 mark fsync
+md5sum /mnt/btrfs-test/foo
+umount /mnt/btrfs-test
+
+dmsetup remove log
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+mount /dev/sdb /mnt/btrfs-test
+md5sum /mnt/btrfs-test/foo
+<verify md5sum's are correct>
+
+Another option is to do a complicated file system operation and verify the file
+system is consistent during the entire operation.  You could do this with:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<fsstress to dirty the fs>
+btrfs filesystem balance /mnt/btrfs-test
+umount /mnt/btrfs-test
+dmsetup remove log
+
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+btrfsck /dev/sdb
+replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+	--fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
--- a/doc/kernel/raid.txt
+++ b/doc/kernel/raid.txt
@@ -222,3 +222,5 @@ Version History
 1.4.2   Add RAID10 "far" and "offset" algorithm support.
 1.5.0   Add message interface to allow manipulation of the sync_action.
 	New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+1.5.1   Add ability to restore transiently failed devices on resume.
+1.5.2   'mismatch_cnt' is zero unless [last_]sync_action is "check".
--- a/doc/kernel/statistics.txt
+++ b/doc/kernel/statistics.txt
@@ -0,0 +1,186 @@
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device.	 If no regions are defined no statistics are
+collected so there isn't any performance impact.  Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as /sys/block/*/stat or /proc/diskstats (see:
+Documentation/iostats.txt).  But two extra counters (12 and 13) are
+provided: total time spent reading and writing in milliseconds.	 All
+these counters may be accessed by sending the @stats_print message to
+the appropriate DM device via dmsetup.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created.	 The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc.  Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space.  At most, 1/4 of the overall system
+memory may be allocated by DM statistics.  The admin can see how much
+memory is used by reading
+/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+    @stats_create <range> <step> [<program_id> [<aux_data>]]
+
+	Create a new region and return the region_id.
+
+	<range>
+	  "-" - whole device
+	  "<start_sector>+<length>" - a range of <length> 512-byte sectors
+				      starting with <start_sector>.
+
+	<step>
+	  "<area_size>" - the range is subdivided into areas each containing
+			  <area_size> sectors.
+	  "/<number_of_areas>" - the range is subdivided into the specified
+				 number of areas.
+
+	<program_id>
+	  An optional parameter.  A name that uniquely identifies
+	  the userspace owner of the range.  This groups ranges together
+	  so that userspace programs can identify the ranges they
+	  created and ignore those created by others.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use it for anything else.
+
+	<aux_data>
+	  An optional parameter.  A word that provides auxiliary data
+	  that is useful to the client program that created the range.
+	  The kernel returns this string back in the output of
+	  @stats_list message, but it doesn't use this value for anything.
+
+    @stats_delete <region_id>
+
+	Delete the region with the specified id.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_clear <region_id>
+
+	Clear all the counters except the in-flight i/o counters.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+    @stats_list [<program_id>]
+
+	List all regions registered with @stats_create.
+
+	<program_id>
+	  An optional parameter.
+	  If this parameter is specified, only matching regions
+	  are returned.
+	  If it is not specified, all regions are returned.
+
+	Output format:
+	  <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+
+    @stats_print <region_id> [<starting_line> <number_of_lines>]
+
+	Print counters for each step-sized area of a region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are returned.
+
+	<number_of_lines>
+	  The number of lines to include in the output.
+	  If omitted, all lines are returned.
+
+	Output format for each step-sized area of a region:
+
+	  <start_sector>+<length> counters
+
+	  The first 11 counters have the same meaning as
+	  /sys/block/*/stat or /proc/diskstats.
+
+	  Please refer to Documentation/iostats.txt for details.
+
+	  1. the number of reads completed
+	  2. the number of reads merged
+	  3. the number of sectors read
+	  4. the number of milliseconds spent reading
+	  5. the number of writes completed
+	  6. the number of writes merged
+	  7. the number of sectors written
+	  8. the number of milliseconds spent writing
+	  9. the number of I/Os currently in progress
+	  10. the number of milliseconds spent doing I/Os
+	  11. the weighted number of milliseconds spent doing I/Os
+
+	  Additional counters:
+	  12. the total time spent reading in milliseconds
+	  13. the total time spent writing in milliseconds
+
+    @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+
+	Atomically print and then clear all the counters except the
+	in-flight i/o counters.	 Useful when the client consuming the
+	statistics does not want to lose any statistics (those updated
+	between printing and clearing).
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<starting_line>
+	  The index of the starting line in the output.
+	  If omitted, all lines are printed and then cleared.
+
+	<number_of_lines>
+	  The number of lines to process.
+	  If omitted, all lines are printed and then cleared.
+
+    @stats_set_aux <region_id> <aux_data>
+
+	Store auxiliary data aux_data for the specified region.
+
+	<region_id>
+	  region_id returned from @stats_create
+
+	<aux_data>
+	  The string that identifies data which is useful to the client
+	  program that created the range.  The kernel returns this
+	  string back in the output of @stats_list message, but it
+	  doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them:
+
+  dmsetup message vol 0 @stats_create - /100
+
+Set the auxillary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them):
+
+  dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics:
+
+  dmsetup message vol 0 @stats_list
+
+Print the statistics:
+
+  dmsetup message vol 0 @stats_print 0
+
+Delete the statistics:
+
+  dmsetup message vol 0 @stats_delete 0
--- a/doc/kernel/switch.txt
+++ b/doc/kernel/switch.txt
@@ -0,0 +1,138 @@
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths.  The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture.  In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters.  When a LUN
+is created it is spread across multiple members.  The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used.  When iSCSI sessions are created, each
+session is connected to an eth port on a single member.  Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required.  This
+forwarding is invisible to the initiator.  The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators.  In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth.  An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets.  However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+    Upper Tier - Determine which array member the I/O should be sent to.
+    Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths.  We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device.  This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O.  By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us).  This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+    <num_paths> <region_size> <num_optional_args> [<optional_args>...]
+    [<dev_path> <offset>]+
+
+<num_paths>
+    The number of paths across which to distribute the I/O.
+
+<region_size>
+    The number of 512-byte sectors in a region. Each region can be redirected
+    to any of the available paths.
+
+<num_optional_args>
+    The number of optional arguments. Currently, no optional arguments
+    are supported and so this must be zero.
+
+<dev_path>
+    The block device that represents a specific path to the device.
+
+<offset>
+    The offset of the start of data on the specific <dev_path> (in units
+    of 512-byte sectors). This number is added to the sector number when
+    forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+    The region number (region size was specified in constructor parameters).
+    If index is omitted, the next region (previous index + 1) is used.
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+    The path number in the range 0 ... (<num_paths> - 1).
+    Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+    This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+    are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+    slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size:
+    dmsetup create switch --table "0 `blockdev --getsize /dev/vg1/switch0`
+	switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1:
+    dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command:
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+is equivalent to:
+    dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+	:1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+
--- a/doc/kernel/thin-provisioning.txt
+++ b/doc/kernel/thin-provisioning.txt
@@ -99,13 +99,14 @@ Using an existing pool device
 		 $data_block_size $low_water_mark"

 $data_block_size gives the smallest unit of disk space that can be
-allocated at a time expressed in units of 512-byte sectors.  People
-primarily interested in thin provisioning may want to use a value such
-as 1024 (512KB).  People doing lots of snapshotting may want a smaller value
-such as 128 (64KB).  If you are not zeroing newly-allocated data,
-a larger $data_block_size in the region of 256000 (128MB) is suggested.
-$data_block_size must be the same for the lifetime of the
-metadata device.
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB).  $data_block_size cannot be changed after the
+thin-pool is created.  People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB).  People doing lots of
+snapshotting may want a smaller value such as 128 (64KB).  If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.

 $low_water_mark is expressed in blocks of size $data_block_size.  If
 free space on the data device drops below this level then a dm event
@@ -115,6 +116,35 @@ Resuming a device with a new table itself triggers an event so the
 userspace daemon can use this to detect a situation where a new table
 already exceeds the threshold.

+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second.  This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache.  If power is lost you may lose some recent
+writes.  The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space).  If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation.  Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed.  It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted.  Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
 Thin provisioning
 -----------------

@@ -234,6 +264,8 @@ i) Constructor
      read_only: Don't allow any changes to be made to the pool
 		 metadata.

+      error_if_no_space: Error IOs, instead of queueing, if no space.
+
    Data block size must be between 64KB (128 sectors) and 1GB
    (2097152 sectors) inclusive.

@@ -255,10 +287,9 @@ ii) Status
 	should register for the event and then check the target's status.

    held metadata root:
-	The location, in sectors, of the metadata root that has been
+	The location, in blocks, of the metadata root that has been
 	'held' for userspace read access.  '-' indicates there is no
-	held root.  This feature is not yet implemented so '-' is
-	always returned.
+	held root.

    discard_passdown|no_discard_passdown
 	Whether or not discards are actually being passed down to the
@@ -275,6 +306,14 @@ ii) Status
 	contain the string 'Fail'.  The userspace recovery tools
 	should then be used.

+    error_if_no_space|queue_if_no_space
+	If the pool runs out of data or metadata space, the pool will
+	either queue or error the IO destined to the data device.  The
+	default is to queue the IO until more space is added or the
+	'no_space_timeout' expires.  The 'no_space_timeout' dm-thin-pool
+	module parameter can be used to change this timeout -- it
+	defaults to 60 seconds but may be disabled using a value of 0.
+
 iii) Messages

    create_thin <dev id>
@@ -341,9 +380,6 @@ then you'll have no access to blocks mapped beyond the end.  If you
 load a target that is bigger than before, then extra blocks will be
 provisioned as and when needed.

-If you wish to reduce the size of your thin device and potentially
-regain some space then send the 'trim' message to the pool.
-
 ii) Status

     <nr mapped sectors> <highest mapped sector>
--- a/doc/kernel/verity.txt
+++ b/doc/kernel/verity.txt
@@ -11,6 +11,7 @@ Construction Parameters
    <data_block_size> <hash_block_size>
    <num_data_blocks> <hash_start_block>
    <algorithm> <digest> <salt>
+    [<#opt_params> <opt_params>]

 <version>
    This is the type of the on-disk hash format.
@@ -62,6 +63,22 @@ Construction Parameters
 <salt>
    The hexadecimal encoding of the salt value.

+<#opt_params>
+    Number of optional parameters. If there are no optional parameters,
+    the optional paramaters section can be skipped or #opt_params can be zero.
+    Otherwise #opt_params is the number of following arguments.
+
+    Example of optional parameters section:
+        1 ignore_corruption
+
+ignore_corruption
+    Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+    Restart the system when a corrupted block is discovered. This option is
+    not compatible with ignore_corruption and requires user space support to
+    avoid restart loops.
+
 Theory of operation
 ===================

@@ -125,7 +142,7 @@ block boundary) are the hash blocks which are stored a depth at a time

 The full specification of kernel parameters and on-disk metadata format
 is available at the cryptsetup project's wiki page
-  http://code.google.com/p/cryptsetup/wiki/DMVerity
+  https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity

 Status
 ======
@@ -142,7 +159,7 @@ Set up a device:

 A command line tool veritysetup is available to compute or verify
 the hash tree or activate the kernel device. This is available from
-the cryptsetup upstream repository http://code.google.com/p/cryptsetup/
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
 (as a libcryptsetup extension).

 Create hash on the device:
--- a/doc/lvmetad_design.txt
+++ b/doc/lvmetad_design.txt
@@ -137,6 +137,17 @@ hosts. Overall, this is not hard, but the devil is in the details. I would
 possibly disable lvmetad for clustered volume groups in the first phase and
 only proceed when the local mode is robust and well tested.

+With lvmlockd, lvmetad state is kept up to date by flagging either an
+individual VG as "invalid", or the global state as "invalid".  When either
+the VG or the global state are read, this invalid flag is returned along
+with the data.  The client command can check for this invalid state and
+decide to read the information from disk rather than use the stale cached
+data.  After the latest data is read from disk, the command may choose to
+send it to lvmetad to update the cache.  lvmlockd uses version numbers
+embedded in its VG and global locks to detect when cached data becomes
+invalid, and it then tells lvmetad to set the related invalid flag.
+dct, 2015-06-23
+
 Protocol & co.
 --------------

--- a/doc/lvmpolld_overview.txt
+++ b/doc/lvmpolld_overview.txt
@@ -0,0 +1,81 @@
+LVM poll daemon overview
+========================
+
+(last updated: 2015-05-09)
+
+LVM poll daemon (lvmpolld) is the alternative for lvm2 classical polling
+mechanisms. The motivation behind new lvmpolld was to create persistent
+system service that would be more durable and transparent. It's suited
+particularly for any systemd enabled distribution.
+
+Before lvmpolld any background polling process originating in a lvm2 command
+initiated inside cgroup of a systemd service could get killed if the main
+process (service) exited in such cgroup. That could lead to premature termination
+of such lvm2 polling process.
+
+Also without lvmpolld there were no means to detect a particular polling process
+suited for monitoring of specific operation is already in-progress and therefore
+it's not desirable to start next one with exactly same task. lvmpolld is able to
+detect such duplicate requests and not spawn such redundant process.
+
+lvmpolld is primarily targeted for systems with systemd as init process. For systems
+without systemd there's no need to install lvmpolld because there is no issue
+with observation described in second paragraph. You can still benefit from
+avoiding duplicate polling process being spawned, but without systemd lvmpolld
+can't easily be run on-demand (activated by a socket maintained by systemd).
+
+lvmpolld implement shutdown on idle and can shutdown automatically when idle
+for requested time. 60 second is recommended default here. This behaviour can be
+turned off if found useless.
+
+Data structures
+---------------
+
+a) Logical Volume (struct lvmpolld_lv)
+
+Each operation is identified by LV. Internal identifier within lvmpolld
+is full LV uuid (vg_uuid+lv_uuid) prefixed with LVM_SYSTEM_DIR if set by client.
+
+such full identifier may look like:
+
+  "/etc/lvm/lvm.confWFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+or without LVM_SYSTEM_DIR being set explicitly:
+
+  "WFd2dU67S8Av29IcJCnYzqQirdfElnxzhCdzEh7EJrfCn9R1TIQjIj58weUZDre4"
+
+
+LV carries various metadata about polling operation. The most significant are:
+
+VG name
+LV name
+polling interval (usually --interval passed to lvm2 command or default from lvm2 
+		  configuration)
+operation type (one of: pvmove, convert, merge, thin_merge)
+LVM_SYSTEM_DIR (if set, this is also passed among environment variables of lvpoll
+		command spawned by lvmpolld)
+
+b) LV stores (struct lvmpolld_store)
+
+lvmpolld uses two stores for Logical volumes (struct lvmpolld_lv). One store for polling
+operations in-progress. These operations are as of now: PV move, mirror up-conversion,
+classical snapshot merge, thin snapshot merge.
+
+The second store is suited only for pvmove --abort operations in-progress. Both
+stores are independent and identical LVs (pvmove /dev/sda3 and pvmove --abort /dev/sda3)
+can be run concurently from lvmpolld point of view (on lvm2 side the consistency is
+guaranteed by lvm2 locking mechanism).
+
+Locking order
+-------------
+
+There are two types of locks in lvmpolld. Each store has own store lock and each LV has
+own lv lock.
+
+Locking order is:
+1) store lock
+2) LV lock
+
+Each LV has to be inside a store. When daemon requires to take both locks it has
+to take a store lock first and LV lock has to be taken afterwards (after the
+appropriate store lock where the LV is being stored :))
--- a/include/.symlinks.in
+++ b/include/.symlinks.in
@@ -1,11 +1,15 @@
@top_srcdir@/daemons/clvmd/clvm.h
@top_srcdir@/daemons/dmeventd/libdevmapper-event.h
@top_srcdir@/daemons/lvmetad/lvmetad-client.h
+@top_srcdir@/daemons/lvmpolld/lvmpolld-protocol.h
+@top_srcdir@/daemons/lvmpolld/polling_ops.h
+@top_srcdir@/daemons/lvmlockd/lvmlockd-client.h
@top_srcdir@/liblvm/lvm2app.h
@top_srcdir@/lib/activate/activate.h
@top_srcdir@/lib/activate/targets.h
@top_srcdir@/lib/cache/lvmcache.h
@top_srcdir@/lib/cache/lvmetad.h
+@top_srcdir@/lib/locking/lvmlockd.h
@top_srcdir@/lib/commands/toolcontext.h
@top_srcdir@/lib/config/config.h
@top_srcdir@/lib/config/config_settings.h
@@ -29,6 +33,8 @@
@top_srcdir@/lib/locking/locking.h
@top_srcdir@/lib/log/log.h
@top_srcdir@/lib/log/lvm-logging.h
+@top_srcdir@/lib/lvmpolld/lvmpolld-client.h
+@top_srcdir@/lib/lvmpolld/polldaemon.h
@top_srcdir@/lib/metadata/lv.h
@top_srcdir@/lib/metadata/lv_alloc.h
@top_srcdir@/lib/metadata/metadata.h
@@ -70,3 +76,4 @@
@top_srcdir@/libdm/misc/kdev_t.h
@top_srcdir@/po/pogen.h
@top_srcdir@/tools/lvm2cmd.h
+@top_srcdir@/tools/tool.h
--- a/include/Makefile.in
+++ b/include/Makefile.in
@@ -20,8 +20,12 @@ include $(top_builddir)/make.tmpl

 all: .symlinks_created

-.symlinks_created: .symlinks 
-	find . -maxdepth 1 -type l -exec $(RM) \{\} \;
+LINKS := $(shell find . -maxdepth 1 -type l)
+
+.symlinks_created: .symlinks
+ifneq (,$(firstword $(LINKS)))
+	$(RM) $(LINKS)
+endif
 	for i in `cat $<`; do $(LN_S) $$i ; done
 	touch $@

@@ -31,5 +35,5 @@ device-mapper: all

 cflow: all

-DISTCLEAN_TARGETS += $(shell find . -maxdepth 1 -type l)
-DISTCLEAN_TARGETS += .include_symlinks .symlinks_created .symlinks
+DISTCLEAN_TARGETS += .symlinks
+CLEAN_TARGETS += $(LINKS) .include_symlinks .symlinks_created
--- a/lib/Makefile.in
+++ b/lib/Makefile.in
@@ -82,7 +82,6 @@ SOURCES =\
 	format_text/format-text.c \
 	format_text/import.c \
 	format_text/import_vsn1.c \
-	format_text/tags.c \
 	format_text/text_label.c \
 	freeseg/freeseg.c \
 	label/label.c \
@@ -123,11 +122,6 @@ SOURCES =\
 	uuid/uuid.c \
 	zero/zero.c

-ifeq ("@HAVE_REALTIME@", "yes")
-  SOURCES +=\
-	misc/timestamp.c
-endif
-
 ifeq ("@LVM1@", "internal")
  SOURCES +=\
 	format1/disk-rep.c \
@@ -196,6 +190,16 @@ ifeq ("@BUILD_LVMETAD@", "yes")
 	cache/lvmetad.c
 endif

+ifeq ("@BUILD_LVMPOLLD@", "yes")
+  SOURCES +=\
+	lvmpolld/lvmpolld-client.c
+endif
+
+ifeq ("@BUILD_LVMLOCKD@", "yes")
+  SOURCES +=\
+	locking/lvmlockd.c
+endif
+
 ifeq ("@DMEVENTD@", "yes")
  CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
  LIBS += -ldevmapper-event
--- a/lib/activate/activate.c
+++ b/lib/activate/activate.c
@@ -180,7 +180,7 @@ int lv_passes_auto_activation_filter(struct cmd_context *cmd, struct logical_vol
 {
 	const struct dm_config_node *cn;

-	if (!(cn = find_config_tree_node(cmd, activation_auto_activation_volume_list_CFG, NULL))) {
+	if (!(cn = find_config_tree_array(cmd, activation_auto_activation_volume_list_CFG, NULL))) {
 		log_verbose("activation/auto_activation_volume_list configuration setting "
 			    "not defined: All logical volumes will be auto-activated.");
 		return 1;
@@ -467,7 +467,7 @@ static int _passes_activation_filter(struct cmd_context *cmd,
 {
 	const struct dm_config_node *cn;

-	if (!(cn = find_config_tree_node(cmd, activation_volume_list_CFG, NULL))) {
+	if (!(cn = find_config_tree_array(cmd, activation_volume_list_CFG, NULL))) {
 		log_verbose("activation/volume_list configuration setting "
 			    "not defined: Checking only host tags for %s/%s",
 			    lv->vg->name, lv->name);
@@ -496,7 +496,7 @@ static int _passes_readonly_filter(struct cmd_context *cmd,
 {
 	const struct dm_config_node *cn;

-	if (!(cn = find_config_tree_node(cmd, activation_read_only_volume_list_CFG, NULL)))
+	if (!(cn = find_config_tree_array(cmd, activation_read_only_volume_list_CFG, NULL)))
 		return 0;

 	return _lv_passes_volumes_filter(cmd, lv, cn, activation_read_only_volume_list_CFG);
@@ -648,8 +648,8 @@ static int _lv_info(struct cmd_context *cmd, const struct logical_volume *lv,
 	 * in progress - as only those could lead to opened files
 	 */
 	if (with_open_count) {
-		if (locking_is_clustered())
-			sync_local_dev_names(cmd); /* Wait to have udev in sync */
+		if (locking_is_clustered() && !sync_local_dev_names(cmd)) /* Wait to have udev in sync */
+			return_0;
 		else if (fs_has_non_delete_ops())
 			fs_unlock(); /* For non clustered - wait if there are non-delete ops */
 	}
@@ -1813,7 +1813,9 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s,
 		goto_out;

 	/* Ignore origin_only unless LV is origin in both old and new metadata */
-	if (!lv_is_thin_volume(ondisk_lv) && !(lv_is_origin(ondisk_lv) && lv_is_origin(incore_lv)))
+	/* or LV is thin or thin pool volume */
+	if (!lv_is_thin_volume(ondisk_lv) && !lv_is_thin_pool(ondisk_lv) &&
+	    !(lv_is_origin(ondisk_lv) && lv_is_origin(incore_lv)))
 		laopts->origin_only = 0;

 	if (test_mode()) {
@@ -1987,7 +1989,6 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
 	const struct logical_volume *lv_to_free = NULL;
 	struct lvinfo info;
 	int r = 0;
-	int messages_only = 0;

 	if (!activation())
 		return 1;
@@ -1995,10 +1996,7 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,
 	if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0)))
 		goto_out;

-	if (lv_is_thin_pool(lv) && laopts->origin_only)
-		messages_only = 1;
-
-	if (!lv_is_origin(lv) && !lv_is_thin_volume(lv))
+	if (!lv_is_origin(lv) && !lv_is_thin_volume(lv) && !lv_is_thin_pool(lv))
 		laopts->origin_only = 0;

 	if (test_mode()) {
@@ -2010,13 +2008,15 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s,

 	log_debug_activation("Resuming LV %s/%s%s%s%s.", lv->vg->name, lv->name,
 			     error_if_not_active ? "" : " if active",
-			     laopts->origin_only ? " without snapshots" : "",
+			     laopts->origin_only ?
+			     (lv_is_thin_pool(lv) ? " pool only" :
+			      lv_is_thin_volume(lv) ? " thin only" : " without snapshots") : "",
 			     laopts->revert ? " (reverting)" : "");

 	if (!lv_info(cmd, lv, laopts->origin_only, &info, 0, 0))
 		goto_out;

-	if (!info.exists || !(info.suspended || messages_only)) {
+	if (!info.exists || !info.suspended) {
 		if (error_if_not_active)
 			goto_out;
 		r = 1;
@@ -2239,7 +2239,7 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
 	}

 	/*
-	 * Check if cmirord is running for clustered mirrors.
+	 * Check if cmirrord is running for clustered mirrors.
 	 */
 	if (!laopts->exclusive && vg_is_clustered(lv->vg) &&
 	    lv_is_mirror(lv) && !lv_is_raid(lv) &&
@@ -2272,6 +2272,7 @@ static int _lv_activate(struct cmd_context *cmd, const char *lvid_s,
 	if (info.exists && !info.suspended && info.live_table &&
 	    (info.read_only == read_only_lv(lv, laopts))) {
 		r = 1;
+		log_debug_activation("Volume is already active.");
 		goto out;
 	}

--- a/lib/activate/dev_manager.c
+++ b/lib/activate/dev_manager.c
@@ -60,6 +60,7 @@ struct dev_manager {
 	uint32_t pvmove_mirror_count;
 	int flush_required;
 	int activation;                 /* building activation tree */
+	int suspend;			/* building suspend tree */
 	int skip_external_lv;
 	struct dm_list pending_delete;	/* str_list of dlid(s) with pending delete */
 	unsigned track_pending_delete;
@@ -445,6 +446,78 @@ out:
 	return r;
 }

+static int _device_is_suspended(int major, int minor)
+{
+	struct dm_task *dmt;
+	struct dm_info info;
+	int r = 0;
+
+	if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+		return 0;
+
+	if (!dm_task_set_major_minor(dmt, major, minor, 1))
+		goto_out;
+
+	if (activation_checks() && !dm_task_enable_checks(dmt))
+		goto_out;
+
+	if (!dm_task_run(dmt) ||
+	    !dm_task_get_info(dmt, &info)) {
+		log_error("Failed to get info for device %d:%d", major, minor);
+		goto out;
+	}
+
+	r = info.exists && info.suspended;
+out:
+	dm_task_destroy(dmt);
+	return r;
+}
+
+static int _ignore_suspended_snapshot_component(struct device *dev)
+{
+	struct dm_task *dmt;
+	void *next = NULL;
+	char *params, *target_type = NULL;
+	uint64_t start, length;
+	int major1, minor1, major2, minor2;
+	int r = 0;
+
+	if (!(dmt = dm_task_create(DM_DEVICE_TABLE)))
+		return_0;
+
+	if (!dm_task_set_major_minor(dmt, MAJOR(dev->dev), MINOR(dev->dev), 1))
+		goto_out;
+
+	if (activation_checks() && !dm_task_enable_checks(dmt))
+		goto_out;
+
+	if (!dm_task_run(dmt)) {
+		log_error("Failed to get state of snapshot or snapshot origin device");
+		goto out;
+	}
+
+	do {
+		next = dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
+		if (!strcmp(target_type, "snapshot")) {
+			if (sscanf(params, "%d:%d %d:%d", &major1, &minor1, &major2, &minor2) != 4) {
+				log_error("Incorrect snapshot table found");
+				goto_out;
+			}
+			r = r || _device_is_suspended(major1, minor1) || _device_is_suspended(major2, minor2);
+		} else if (!strcmp(target_type, "snapshot-origin")) {
+			if (sscanf(params, "%d:%d", &major1, &minor1) != 2) {
+				log_error("Incorrect snapshot-origin table found");
+				goto_out;
+			}
+			r = r || _device_is_suspended(major1, minor1);
+		}
+	} while (next);
+
+out:
+	dm_task_destroy(dmt);
+	return r;
+}
+
 /*
 * device_is_usable
 * @dev
@@ -553,15 +626,25 @@ int device_is_usable(struct device *dev, struct dev_usable_check_params check)
 		 * supported anymore and in general using mirrors in a stack
 		 * is disabled by default (with a warning that if enabled,
 		 * it could cause various deadlocks).
-		 * This is former check used, but it's not correct as it
-		 * disables snapshot-origins to be used in a stack in
-		 * general, not just over mirrors!
+		 * Similar situation can happen with RAID devices where
+		 * a RAID device can be snapshotted.
+		 * If one of the RAID legs are down and we're doing
+		 * lvconvert --repair, there's a time period in which
+		 * snapshot components are (besides other devs) suspended.
+		 * See also https://bugzilla.redhat.com/show_bug.cgi?id=1219222
+		 * for an example where this causes problems.
+		 *
+		 * This is a quick check for now, but replace it with more
+		 * robust and better check that would check the stack
+		 * correctly, not just snapshots but any cobimnation possible
+		 * in a stack - use proper dm tree to check this instead.
 		 */
-		/*if (check.check_suspended && target_type && !strcmp(target_type, "snapshot-origin")) {
-			log_debug_activation("%s: Snapshot-origin device %s not usable.",
-					     dev_name(dev), name);
+		if (check.check_suspended && target_type &&
+		    (!strcmp(target_type, "snapshot") || !strcmp(target_type, "snapshot-origin")) &&
+		    _ignore_suspended_snapshot_component(dev)) {
+			log_debug_activation("%s: %s device %s not usable.", dev_name(dev), target_type, name);
 			goto out;
-		}*/
+		}

 		if (target_type && strcmp(target_type, "error"))
 			only_error_target = 0;
@@ -1835,7 +1918,6 @@ struct pool_cb_data {
 	int skip_zero;  /* to skip zeroed device header (check first 64B) */
 	int exec;       /* which binary to call */
 	int opts;
-	const char *defaults;
 	const char *global;
 };

@@ -1843,7 +1925,6 @@ static int _pool_callback(struct dm_tree_node *node,
 			  dm_node_callback_t type, void *cb_data)
 {
 	int ret, status, fd;
-	char *split;
 	const struct dm_config_node *cn;
 	const struct dm_config_value *cv;
 	const struct pool_cb_data *data = cb_data;
@@ -1858,23 +1939,19 @@ static int _pool_callback(struct dm_tree_node *node,
 	if (!*argv[0])
 		return 1; /* Checking disabled */

-	if ((cn = find_config_tree_node(mlv->vg->cmd, data->opts, NULL))) {
-		for (cv = cn->v; cv && args < 16; cv = cv->next) {
-			if (cv->type != DM_CFG_STRING) {
-				log_error("Invalid string in config file: "
-					  "global/%s_check_options",
-					  data->global);
-				return 0;
-			}
-			argv[++args] = cv->v.str;
-		}
-	} else {
-		/* Use default options (no support for options with spaces) */
-		if (!(split = dm_pool_strdup(data->dm->mem, data->defaults))) {
-			log_error("Failed to duplicate defaults.");
+	if (!(cn = find_config_tree_array(mlv->vg->cmd, data->opts, NULL))) {
+		log_error(INTERNAL_ERROR "Unable to find configuration for pool check options.");
+		return 0;
+	}
+
+	for (cv = cn->v; cv && args < 16; cv = cv->next) {
+		if (cv->type != DM_CFG_STRING) {
+			log_error("Invalid string in config file: "
+				  "global/%s_check_options",
+				  data->global);
 			return 0;
 		}
-		args = dm_split_words(split, 16, 0, (char**) argv + 1);
+		argv[++args] = cv->v.str;
 	}

 	if (args == 16) {
@@ -1965,14 +2042,12 @@ static int _pool_register_callback(struct dev_manager *dm,
 		data->skip_zero = 1;
 		data->exec = global_thin_check_executable_CFG;
 		data->opts = global_thin_check_options_CFG;
-		data->defaults = DEFAULT_THIN_CHECK_OPTIONS;
 		data->global = "thin";
 	} else if (lv_is_cache(lv)) { /* cache pool */
 		data->pool_lv = first_seg(lv)->pool_lv;
 		data->skip_zero = dm->activation;
 		data->exec = global_cache_check_executable_CFG;
 		data->opts = global_cache_check_options_CFG;
-		data->defaults = DEFAULT_CACHE_CHECK_OPTIONS;
 		data->global = "cache";
 	} else {
 		log_error(INTERNAL_ERROR "Registering unsupported pool callback.");
@@ -1984,6 +2059,11 @@ static int _pool_register_callback(struct dev_manager *dm,
 	return 1;
 }

+/* Declaration to resolve suspend tree and message passing for thin-pool */
+static int _add_target_to_dtree(struct dev_manager *dm,
+				struct dm_tree_node *dnode,
+				struct lv_segment *seg,
+				struct lv_activate_opts *laopts);
 /*
 * Add LV and any known dependencies
 */
@@ -2052,15 +2132,43 @@ static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
 		 */
 		if (!_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv)))
 			return_0;
+
+		/*
+		 * TODO: change API and move this code
+		 * Could be easier to handle this in _add_dev_to_dtree()
+		 * and base this according to info.exists ?
+		 */
 		if (!dm->activation) {
-			/* Setup callback for non-activation partial tree */
-			/* Activation gets own callback when needed */
-			/* TODO: extend _cached_dm_info() to return dnode */
 			if (!(uuid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
 				return_0;
-			if ((node = dm_tree_find_node_by_uuid(dtree, uuid)) &&
-			    !_pool_register_callback(dm, node, lv))
-				return_0;
+			if ((node = dm_tree_find_node_by_uuid(dtree, uuid))) {
+				if (origin_only) {
+					struct lv_activate_opts laopts = {
+						.origin_only = 1,
+						.send_messages = 1 /* Node with messages */
+					};
+					/*
+					 * Add some messsages if right node exist in the table only
+					 * when building SUSPEND tree for origin-only thin-pool.
+					 *
+					 * TODO: Fix call of '_add_target_to_dtree()' to add message
+					 * to thin-pool node as we already know the pool node exists
+					 * in the table. Any better/cleaner API way ?
+					 *
+					 * Probably some 'new' target method to add messages for any node?
+					 */
+					if (dm->suspend &&
+					    !dm_list_empty(&(first_seg(lv)->thin_messages)) &&
+					    !_add_target_to_dtree(dm, node, first_seg(lv), &laopts))
+						return_0;
+				} else {
+					/* Setup callback for non-activation partial tree */
+					/* Activation gets own callback when needed */
+					/* TODO: extend _cached_dm_info() to return dnode */
+					if (!_pool_register_callback(dm, node, lv))
+						return_0;
+				}
+			}
 		}
 	}

@@ -2158,7 +2266,7 @@ static struct dm_tree *_create_partial_dtree(struct dev_manager *dm, const struc

 	dm_tree_set_optional_uuid_suffixes(dtree, &uuid_suffix_list[0]);

-	if (!_add_lv_to_dtree(dm, dtree, lv, (lv_is_origin(lv) || lv_is_thin_volume(lv)) ? origin_only : 0))
+	if (!_add_lv_to_dtree(dm, dtree, lv, (lv_is_origin(lv) || lv_is_thin_volume(lv) || lv_is_thin_pool(lv)) ? origin_only : 0))
 		goto_bad;

 	return dtree;
@@ -2613,7 +2721,7 @@ static int _add_segment_to_dtree(struct dev_manager *dm,
 		return_0;

 	/* Add pool layer */
-	if (seg->pool_lv &&
+	if (seg->pool_lv && !laopts->origin_only &&
 	    !_add_new_lv_to_dtree(dm, dtree, seg->pool_lv, laopts,
 				  lv_layer(seg->pool_lv)))
 		return_0;
@@ -3042,7 +3150,10 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
 	int r = 0;

 	if (action < DM_ARRAY_SIZE(_action_names))
-		log_debug_activation("Creating %s tree for %s.", _action_names[action], lv->name);
+		log_debug_activation("Creating %s%s tree for %s.",
+				     _action_names[action],
+				     (laopts->origin_only) ? " origin-only" : "",
+				     display_lvname(lv));

 	/* Some LV can be used for top level tree */
 	/* TODO: add more.... */
@@ -3052,6 +3163,7 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
 	}
 	/* Some targets may build bigger tree for activation */
 	dm->activation = ((action == PRELOAD) || (action == ACTIVATE));
+	dm->suspend = (action == SUSPEND_WITH_LOCKFS) || (action == SUSPEND);
 	if (!(dtree = _create_partial_dtree(dm, lv, laopts->origin_only)))
 		return_0;

@@ -3096,7 +3208,9 @@ static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
 	case PRELOAD:
 	case ACTIVATE:
 		/* Add all required new devices to tree */
-		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, (lv_is_origin(lv) && laopts->origin_only) ? "real" : NULL))
+		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts,
+					  (lv_is_origin(lv) && laopts->origin_only) ? "real" :
+					  (lv_is_thin_pool(lv) && laopts->origin_only) ? "tpool" : NULL))
 			goto_out;

 		/* Preload any devices required before any suspensions */
@@ -3134,7 +3248,6 @@ out_no_root:
 int dev_manager_activate(struct dev_manager *dm, const struct logical_volume *lv,
 			 struct lv_activate_opts *laopts)
 {
-	laopts->send_messages = 1;
 	if (!_tree_action(dm, lv, laopts, ACTIVATE))
 		return_0;

--- a/lib/cache/lvmcache.c
+++ b/lib/cache/lvmcache.c
@@ -56,6 +56,7 @@ struct lvmcache_vginfo {
 	char _padding[7];
 	struct lvmcache_vginfo *next; /* Another VG with same name? */
 	char *creation_host;
+	char *lock_type;
 	uint32_t mda_checksum;
 	size_t mda_size;
 	size_t vgmetadata_size;
@@ -67,6 +68,7 @@ struct lvmcache_vginfo {
 	unsigned vg_use_count;	/* Counter of vg reusage */
 	unsigned precommitted;	/* Is vgmetadata live or precommitted? */
 	unsigned cached_vg_invalidated;	/* Signal to regenerate cached_vg */
+	unsigned preferred_duplicates; /* preferred duplicate pvs have been set */
 };

 static struct dm_hash_table *_pvid_hash = NULL;
@@ -115,6 +117,47 @@ int lvmcache_init(void)
 	return 1;
 }

+/*
+ * Once PV info has been populated in lvmcache and
+ * lvmcache has chosen preferred duplicate devices,
+ * set this flag so that lvmcache will not try to
+ * compare and choose preferred duplicate devices
+ * again (which may result in different preferred
+ * devices.)  PV info can be populated in lvmcache
+ * multiple times, each time causing lvmcache to
+ * compare the duplicate devices, so we need to
+ * record that the comparison/preferences have
+ * already been done, so the preferrences from the
+ * first time through are not changed.
+ *
+ * This is something of a hack to work around the
+ * fact that the code isn't really designed to
+ * handle duplicate PVs, and the fact that lvmetad
+ * has its own way of picking a preferred duplicate
+ * and lvmcache has another way based on having
+ * more information than lvmetad does.
+ *
+ * If we come up with a better overall method to
+ * handle duplicate PVs, then this can probably be
+ * removed.
+ *
+ * FIXME: if we want to make lvmetad work with clvmd,
+ * then this may need to be changed to set
+ * preferred_duplicates back to 0.
+ */
+
+void lvmcache_set_preferred_duplicates(const char *vgid)
+{
+	struct lvmcache_vginfo *vginfo;
+
+	if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) {
+		stack;
+		return;
+	}
+
+	vginfo->preferred_duplicates = 1;
+}
+
 void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd)
 {
 	if (!lvmetad_active() || _has_scanned)
@@ -287,7 +330,7 @@ void lvmcache_commit_metadata(const char *vgname)

 void lvmcache_drop_metadata(const char *vgname, int drop_precommitted)
 {
-	if (lvmcache_vgname_is_locked(VG_GLOBAL))
+	if (lvmcache_vgname_is_locked(VG_GLOBAL) && !vg_write_lock_held())
 		return;

 	/* For VG_ORPHANS, we need to invalidate all labels on orphan PVs. */
@@ -863,6 +906,37 @@ int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo
 }
 // #endif

+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+			   struct dm_list *vgnameids)
+{
+	struct vgnameid_list *vgnl;
+	struct lvmcache_vginfo *vginfo;
+
+	lvmcache_label_scan(cmd, 0);
+
+	dm_list_iterate_items(vginfo, &_vginfos) {
+		if (!include_internal && is_orphan_vg(vginfo->vgname))
+			continue;
+
+		if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+			log_error("vgnameid_list allocation failed.");
+			return 0;
+		}
+
+		vgnl->vgid = dm_pool_strdup(cmd->mem, vginfo->vgid);
+		vgnl->vg_name = dm_pool_strdup(cmd->mem, vginfo->vgname);
+
+		if (!vgnl->vgid || !vgnl->vg_name) {
+			log_error("vgnameid_list member allocation failed.");
+			return 0;
+		}
+
+		dm_list_add(vgnameids, &vgnl->list);
+	}
+
+	return 1;
+}
+
 struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
 				   int include_internal)
 {
@@ -1374,7 +1448,7 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info,
 }

 static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstatus,
-				     const char *creation_host)
+				     const char *creation_host, const char *lock_type)
 {
 	if (!info || !info->vginfo)
 		return 1;
@@ -1387,11 +1461,11 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
 	info->vginfo->status = vgstatus;

 	if (!creation_host)
-		return 1;
+		goto set_lock_type;

 	if (info->vginfo->creation_host && !strcmp(creation_host,
 						   info->vginfo->creation_host))
-		return 1;
+		goto set_lock_type;

 	if (info->vginfo->creation_host)
 		dm_free(info->vginfo->creation_host);
@@ -1405,6 +1479,24 @@ static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstat
 	log_debug_cache("lvmcache: %s: VG %s: Set creation host to %s.",
 			dev_name(info->dev), info->vginfo->vgname, creation_host);

+set_lock_type:
+
+	if (!lock_type)
+		goto out;
+
+	if (info->vginfo->lock_type && !strcmp(lock_type, info->vginfo->lock_type))
+		goto out;
+
+	if (info->vginfo->lock_type)
+		dm_free(info->vginfo->lock_type);
+
+	if (!(info->vginfo->lock_type = dm_strdup(lock_type))) {
+		log_error("cache creation host alloc failed for %s",
+			  lock_type);
+		return 0;
+	}
+
+out:
 	return 1;
 }

@@ -1473,7 +1565,7 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg
 	if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus,
 				     vgsummary->creation_host, info->fmt) ||
 	    !_lvmcache_update_vgid(info, info->vginfo, vgid) ||
-	    !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host) ||
+	    !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type) ||
 	    !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size))
 		return_0;

@@ -1488,7 +1580,8 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted)
 	struct lvmcache_vgsummary vgsummary = {
 		.vgname = vg->name,
 		.vgstatus = vg->status,
-		.vgid = vg->id
+		.vgid = vg->id,
+		.lock_type = vg->lock_type
 	};

 	pvid_s[sizeof(pvid_s) - 1] = '\0';
@@ -1529,6 +1622,64 @@ void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,
 	pv->dev = dev;
 }

+/*
+ * We can see multiple different devices with the
+ * same pvid, i.e. duplicates.
+ *
+ * There may be different reasons for seeing two
+ * devices with the same pvid:
+ * - multipath showing two paths to the same thing
+ * - one device copied to another, e.g. with dd,
+ *   also referred to as cloned devices.
+ * - a "subsystem" taking a device and creating
+ *   another device of its own that represents the
+ *   underlying device it is using, e.g. using dm
+ *   to create an identity mapping of a PV.
+ *
+ * Given duplicate devices, we have to choose one
+ * of them to be the "preferred" dev, i.e. the one
+ * that will be referenced in lvmcache, by pv->dev.
+ * We can keep the existing dev, that's currently
+ * used in lvmcache, or we can replace the existing
+ * dev with the new duplicate.
+ *
+ * Regardless of which device is preferred, we need
+ * to print messages explaining which devices were
+ * found so that a user can sort out for themselves
+ * what has happened if the preferred device is not
+ * the one they are interested in.
+ *
+ * If a user wants to use the non-preferred device,
+ * they will need to filter out the device that
+ * lvm is preferring.
+ *
+ * The dev_subsystem calls check if the major number
+ * of the dev is part of a subsystem like DM/MD/DRBD.
+ * A dev that's part of a subsystem is preferred over a
+ * duplicate of that dev that is not part of a
+ * subsystem.
+ *
+ * The has_holders calls check if the device is being
+ * used by another, and prefers one that's being used.
+ *
+ * FIXME: why do we prefer a device without holders
+ * over a device with holders?  We should understand
+ * the reason for that choice.
+ *
+ * FIXME: there may be other reasons to prefer one
+ * device over another:
+ *
+ * . are there other use/open counts we could check
+ *   beyond the holders?
+ *
+ * . check if either is bad/usable and prefer
+ *   the good one?
+ *
+ * . prefer the one with smaller minor number?
+ *   Might avoid disturbing things due to a new
+ *   transient duplicate?
+ */
+
 struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 				   struct device *dev,
 				   const char *vgname, const char *vgid,
@@ -1544,6 +1695,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 		.vgstatus = vgstatus,
 	};

+	/* N.B. vgid is not NUL-terminated when called from _text_pv_write */
 	if (vgid)
 		strncpy((char *)&vgsummary.vgid, vgid, sizeof(vgsummary.vgid));

@@ -1575,51 +1727,166 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid,
 		lvmcache_del_bas(info);
 	} else {
 		if (existing->dev != dev) {
-			/* Is the existing entry a duplicate pvid e.g. md ? */
-			if (dev_subsystem_part_major(dt, existing->dev) &&
-			    !dev_subsystem_part_major(dt, dev)) {
-				log_very_verbose("Ignoring duplicate PV %s on "
-						 "%s - using %s %s",
-						 pvid, dev_name(dev),
-						 dev_subsystem_name(dt, existing->dev),
-						 dev_name(existing->dev));
-				return NULL;
-			} else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				   !dm_is_dm_major(MAJOR(dev->dev))) {
-				log_very_verbose("Ignoring duplicate PV %s on "
-						 "%s - using dm %s",
-						 pvid, dev_name(dev),
-						 dev_name(existing->dev));
-				return NULL;
-			} else if (!dev_subsystem_part_major(dt, existing->dev) &&
-				   dev_subsystem_part_major(dt, dev))
-				log_very_verbose("Duplicate PV %s on %s - "
-						 "using %s %s", pvid,
-						 dev_name(existing->dev),
-						 dev_subsystem_name(dt, existing->dev),
-						 dev_name(dev));
-			else if (!dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				 dm_is_dm_major(MAJOR(dev->dev)))
-				log_very_verbose("Duplicate PV %s on %s - "
-						 "using dm %s", pvid,
-						 dev_name(existing->dev),
-						 dev_name(dev));
-			/* FIXME If both dm, check dependencies */
-			//else if (dm_is_dm_major(MAJOR(existing->dev->dev)) &&
-				 //dm_is_dm_major(MAJOR(dev->dev)))
-				 //
-			else if (!strcmp(pvid_s, existing->dev->pvid)) {
-				log_error("Found duplicate PV %s: using %s not "
-					  "%s", pvid, dev_name(dev),
-					  dev_name(existing->dev));
-				_found_duplicate_pvs = 1;
+			int old_in_subsystem = 0;
+			int new_in_subsystem = 0;
+			int old_is_dm = 0;
+			int new_is_dm = 0;
+			int old_has_holders = 0;
+			int new_has_holders = 0;
+
+			/*
+			 * Here are different devices with the same pvid:
+			 * duplicates.  See comment above.
+			 */
+
+			/*
+			 * This flag tells the process_each_pv code to search
+			 * the devices list for duplicates, so that devices
+			 * can be processed together with their duplicates
+			 * (while processing the VG, rather than reporting
+			 * pv->dev under the VG, and its duplicate outside
+			 * the VG context.)
+			 */
+			_found_duplicate_pvs = 1;
+
+			/*
+			 * The new dev may not have pvid set.
+			 * The process_each_pv code needs to have the pvid
+			 * set in each device to detect that the devices
+			 * are duplicates.
+			 */
+			strncpy(dev->pvid, pvid_s, sizeof(dev->pvid));
+
+			/*
+			 * Now decide if we are going to ignore the new
+			 * device, or replace the existing/old device in
+			 * lvmcache with the new one.
+			 */
+			old_in_subsystem = dev_subsystem_part_major(dt, existing->dev);
+			new_in_subsystem = dev_subsystem_part_major(dt, dev);
+
+			old_is_dm = dm_is_dm_major(MAJOR(existing->dev->dev));
+			new_is_dm = dm_is_dm_major(MAJOR(dev->dev));
+
+			old_has_holders = dm_device_has_holders(MAJOR(existing->dev->dev), MINOR(existing->dev->dev));
+			new_has_holders = dm_device_has_holders(MAJOR(dev->dev), MINOR(dev->dev));
+
+			if (old_has_holders && new_has_holders) {
+				/*
+				 * This is not a selection of old or new, but
+				 * just a warning to be aware of.
+				 */
+				log_warn("WARNING: duplicate PV %s is being used from both devices %s and %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
 			}
+
+			if (existing->vginfo->preferred_duplicates) {
+				/*
+				 * The preferred duplicate devs have already
+				 * been chosen during a previous populating of
+				 * lvmcache, so just use the existing preferences.
+				 */
+				log_verbose("Found duplicate PV %s: using existing dev %s",
+					    pvid_s,
+					    dev_name(existing->dev));
+				return NULL;
+			}
+
+			if (old_in_subsystem && !new_in_subsystem) {
+				/* Use old, ignore new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				log_warn("Using duplicate PV %s from subsystem %s, ignoring %s",
+					 dev_name(existing->dev),
+					 dev_subsystem_name(dt, existing->dev),
+					 dev_name(dev));
+				return NULL;
+
+			} else if (!old_in_subsystem && new_in_subsystem) {
+				/* Use new, replace old. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s from subsystem %s, replacing %s",
+					 dev_name(dev),
+					 dev_subsystem_name(dt, dev),
+					 dev_name(existing->dev));
+
+			} else if (old_has_holders && !new_has_holders) {
+				/* Use new, replace old. */
+				/* FIXME: why choose the one without olders? */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s without holders, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+
+			} else if (!old_has_holders && new_has_holders) {
+				/* Use old, ignore new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				log_warn("Using duplicate PV %s without holders, ignoring %s",
+					 dev_name(existing->dev),
+					 dev_name(dev));
+				return NULL;
+
+			} else if (old_is_dm && new_is_dm) {
+				/* Use new, replace old. */
+				/* FIXME: why choose the new instead of the old? */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s which is last seen, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+
+			} else if (!strcmp(pvid_s, existing->dev->pvid)) {
+				/* No criteria to use for preferring old or new. */
+				/* FIXME: why choose the new instead of the old? */
+				/* FIXME: a transient duplicate would be a reason
+				 * to select the old instead of the new. */
+				log_warn("Found duplicate PV %s: using %s not %s",
+					 pvid_s,
+					 dev_name(dev),
+					 dev_name(existing->dev));
+				log_warn("Using duplicate PV %s which is last seen, replacing %s",
+					 dev_name(dev),
+					 dev_name(existing->dev));
+			}
+		} else {
+			/*
+			 * The new dev is the same as the existing dev.
+			 *
+			 * FIXME: Why can't we just return NULL here if the
+			 * device already exists?  Things don't seem to work
+			 * if we do that for some reason.
+			 */
+			log_verbose("Found same device %s with same pvid %s",
+				    dev_name(existing->dev), pvid_s);
 		}
-		if (strcmp(pvid_s, existing->dev->pvid)) 
-			log_debug_cache("Updating pvid cache to %s (%s) from %s (%s)",
-					pvid_s, dev_name(dev),
-					existing->dev->pvid, dev_name(existing->dev));
-		/* Switch over to new preferred device */
+
+		/*
+		 * This happens when running pvcreate on an existing PV.
+		 */
+		if (strcmp(pvid_s, existing->dev->pvid))  {
+			log_verbose("Replacing dev %s pvid %s with dev %s pvid %s",
+				    dev_name(existing->dev), existing->dev->pvid,
+				    dev_name(dev), pvid_s);
+		}
+
+		/*
+		 * Switch over to new preferred device.
+		 */
 		existing->dev = dev;
 		info = existing;
 		/* Has labeller changed? */
@@ -2069,7 +2336,8 @@ int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary)
 			vgsummary->vgname = vginfo->vgname;
 			vgsummary->creation_host = vginfo->creation_host;
 			vgsummary->vgstatus = vginfo->status;
-			memcpy((char *)&vgsummary->vgid, vginfo->vgid, sizeof(vginfo->vgid));
+			/* vginfo->vgid has 1 extra byte then vgsummary->vgid */
+			memcpy(&vgsummary->vgid, vginfo->vgid, sizeof(vgsummary->vgid));

 			return 1;
 		}
@@ -2077,3 +2345,16 @@ int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary)

 	return 0;
 }
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd)
+{
+	struct lvmcache_vginfo *vginfo;
+
+	dm_list_iterate_items(vginfo, &_vginfos) {
+		if (vginfo->lock_type && !strcmp(vginfo->lock_type, "sanlock"))
+			return 1;
+	}
+
+	return 0;
+}
+
--- a/lib/cache/lvmcache.h
+++ b/lib/cache/lvmcache.h
@@ -39,11 +39,23 @@ struct disk_locn;

 struct lvmcache_vginfo;

+/*
+ * vgsummary represents a summary of the VG that is read
+ * without a lock.  The info does not come through vg_read(),
+ * but through reading mdas.  It provides information about
+ * the VG that is needed to lock the VG and then read it fully
+ * with vg_read(), after which the VG summary should be checked
+ * against the full VG metadata to verify it was correct (since
+ * it was read without a lock.)
+ *
+ * Once read, vgsummary information is saved in lvmcache_vginfo.
+ */
 struct lvmcache_vgsummary {
 	const char *vgname;
 	struct id vgid;
 	uint64_t vgstatus;
 	char *creation_host;
+	const char *lock_type;
 	uint32_t mda_checksum;
 	size_t mda_size;
 };
@@ -107,6 +119,9 @@ struct dm_list *lvmcache_get_vgnames(struct cmd_context *cmd,
 struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd,
 				   int include_internal);

+int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal,
+                          struct dm_list *vgnameids);
+
 /* Returns list of struct dm_str_list containing pool-allocated copy of pvids */
 struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname,
 				const char *vgid);
@@ -171,4 +186,8 @@ void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv,

 int lvmcache_found_duplicate_pvs(void);

+void lvmcache_set_preferred_duplicates(const char *vgid);
+
+int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd);
+
 #endif
--- a/lib/cache/lvmetad.c
+++ b/lib/cache/lvmetad.c
@@ -22,6 +22,7 @@
 #include "format-text.h" // TODO for disk_locn, used as a DA representation
 #include "crc.h"
 #include "lvm-signal.h"
+#include "lvmlockd.h"

 #define SCAN_TIMEOUT_SECONDS	80
 #define MAX_RESCANS		10	/* Maximum number of times to scan all PVs and retry if the daemon returns a token mismatch error */
@@ -34,12 +35,13 @@ static char *_lvmetad_token = NULL;
 static const char *_lvmetad_socket = NULL;
 static struct cmd_context *_lvmetad_cmd = NULL;

+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg);
+
 void lvmetad_disconnect(void)
 {
 	if (_lvmetad_connected)
 		daemon_close(_lvmetad);
 	_lvmetad_connected = 0;
-	_lvmetad_cmd = NULL;
 }

 void lvmetad_init(struct cmd_context *cmd)
@@ -47,6 +49,10 @@ void lvmetad_init(struct cmd_context *cmd)
 	if (!_lvmetad_use && !access(getenv("LVM_LVMETAD_PIDFILE") ? : LVMETAD_PIDFILE, F_OK))
 		log_warn("WARNING: lvmetad is running but disabled."
 			 " Restart lvmetad before enabling it!");
+
+	if (_lvmetad_connected)
+		log_debug(INTERNAL_ERROR "Refreshing lvmetad global handle while connection with the daemon is active");
+
 	_lvmetad_cmd = cmd;
 }

@@ -142,7 +148,7 @@ static int _lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler
 static daemon_reply _lvmetad_send(const char *id, ...)
 {
 	va_list ap;
-	daemon_reply repl;
+	daemon_reply repl = { 0 };
 	daemon_request req;
 	unsigned num_rescans = 0;
 	unsigned total_usecs_waited = 0;
@@ -152,8 +158,10 @@ static daemon_reply _lvmetad_send(const char *id, ...)
 retry:
 	req = daemon_request_make(id);

-	if (_lvmetad_token)
-		daemon_request_extend(req, "token = %s", _lvmetad_token, NULL);
+	if (_lvmetad_token && !daemon_request_extend(req, "token = %s", _lvmetad_token, NULL)) {
+		repl.error = ENOMEM;
+		return repl;
+	}

 	va_start(ap, id);
 	daemon_request_extend_v(req, ap);
@@ -265,19 +273,21 @@ static int _read_mda(struct lvmcache_info *info,
 	return 0;
 }

-static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
-						   struct dm_config_node *cn,
-						   struct format_type *fmt, dev_t fallback)
+static int _pv_populate_lvmcache(struct cmd_context *cmd,
+				 struct dm_config_node *cn,
+				 struct format_type *fmt, dev_t fallback)
 {
-	struct device *dev;
+	struct device *dev, *dev_alternate, *dev_alternate_cache = NULL;
+	struct label *label;
 	struct id pvid, vgid;
 	char mda_id[32];
 	char da_id[32];
 	int i = 0;
-	struct dm_config_node *mda = NULL;
-	struct dm_config_node *da = NULL;
+	struct dm_config_node *mda, *da;
+	struct dm_config_node *alt_devices = dm_config_find_node(cn->child, "devices_alternate");
+	struct dm_config_value *alt_device = NULL;
 	uint64_t offset, size;
-	struct lvmcache_info *info;
+	struct lvmcache_info *info, *info_alternate;
 	const char *pvid_txt = dm_config_find_str(cn->child, "id", NULL),
 		   *vgid_txt = dm_config_find_str(cn->child, "vgid", NULL),
 		   *vgname = dm_config_find_str(cn->child, "vgname", NULL),
@@ -291,7 +301,7 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,

 	if (!fmt) {
 		log_error("PV %s not recognised. Is the device missing?", pvid_txt);
-		return NULL;
+		return 0;
 	}

 	dev = dev_cache_get_by_devt(devt, cmd->filter);
@@ -300,17 +310,17 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,

 	if (!dev) {
 		log_warn("WARNING: Device for PV %s not found or rejected by a filter.", pvid_txt);
-		return NULL;
+		return 0;
 	}

 	if (!pvid_txt || !id_read_format(&pvid, pvid_txt)) {
 		log_error("Missing or ill-formatted PVID for PV: %s.", pvid_txt);
-		return NULL;
+		return 0;
 	}

 	if (vgid_txt) {
 		if (!id_read_format(&vgid, vgid_txt))
-			return_NULL;
+			return_0;
 	} else
 		strcpy((char*)&vgid, fmt->orphan_vg_name);

@@ -319,7 +329,7 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,

 	if (!(info = lvmcache_add(fmt->labeller, (const char *)&pvid, dev,
 				  vgname, (const char *)&vgid, 0)))
-		return_NULL;
+		return_0;

 	lvmcache_get_label(info)->sector = label_sector;
 	lvmcache_get_label(info)->dev = dev;
@@ -360,12 +370,59 @@ static struct lvmcache_info *_pv_populate_lvmcache(struct cmd_context *cmd,
 		++i;
 	} while (da);

-	return info;
+	if (alt_devices)
+		alt_device = alt_devices->v;
+
+	while (alt_device) {
+		dev_alternate = dev_cache_get_by_devt(alt_device->v.i, cmd->filter);
+		if (dev_alternate) {
+			if ((info_alternate = lvmcache_add(fmt->labeller, (const char *)&pvid, dev_alternate,
+							   vgname, (const char *)&vgid, 0))) {
+				dev_alternate_cache = dev_alternate;
+				info = info_alternate;
+				lvmcache_get_label(info)->dev = dev_alternate;
+			}
+		} else {
+			log_warn("Duplicate of PV %s dev %s exists on unknown device %"PRId64 ":%" PRId64,
+				 pvid_txt, dev_name(dev), MAJOR(alt_device->v.i), MINOR(alt_device->v.i));
+		}
+		alt_device = alt_device->next;
+	}
+
+	/*
+	 * Update lvmcache with the info about the alternate device by
+	 * reading its label, which should update lvmcache.
+	 */
+	if (dev_alternate_cache) {
+		if (!label_read(dev_alternate_cache, &label, 0)) {
+			log_warn("No PV label found on duplicate device %s.", dev_name(dev_alternate_cache));
+		}
+	}
+
+	lvmcache_set_preferred_duplicates((const char *)&vgid);
+	return 1;
+}
+
+static int _pv_update_struct_pv(struct physical_volume *pv, struct format_instance *fid)
+{
+	struct lvmcache_info *info;
+	if ((info = lvmcache_info_from_pvid((const char *)&pv->id, 0))) {
+		pv->label_sector = lvmcache_get_label(info)->sector;
+		pv->dev = lvmcache_device(info);
+		if (!pv->dev)
+			pv->status |= MISSING_PV;
+		if (!lvmcache_fid_add_mdas_pv(info, fid))
+			return_0;
+                pv->fid = fid;
+	} else
+		pv->status |= MISSING_PV; /* probably missing */
+	return 1;
 }

 struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgname, const char *vgid)
 {
 	struct volume_group *vg = NULL;
+	struct volume_group *vg2 = NULL;
 	daemon_reply reply;
 	int found;
 	char uuid[64];
@@ -377,7 +434,6 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
 	struct format_type *fmt;
 	struct dm_config_node *pvcn;
 	struct pv_list *pvl;
-	struct lvmcache_info *info;

 	if (!lvmetad_active())
 		return NULL;
@@ -428,22 +484,38 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna
 			for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
 				_pv_populate_lvmcache(cmd, pvcn, fmt, 0);

+		if ((pvcn = dm_config_find_node(top, "metadata/outdated_pvs")))
+			for (pvcn = pvcn->child; pvcn; pvcn = pvcn->sib)
+				_pv_populate_lvmcache(cmd, pvcn, fmt, 0);
+
 		top->key = name;
-		if (!(vg = import_vg_from_config_tree(reply.cft, fid)))
+		if (!(vg = import_vg_from_lvmetad_config_tree(reply.cft, fid)))
 			goto_out;

+		/*
+		 * locking may have detected a newer vg version and
+		 * invalidated the cached vg.
+		 */
+		if (dm_config_find_node(reply.cft->root, "vg_invalid")) {
+			log_debug_lvmetad("Update invalid lvmetad cache for VG %s", vgname);
+			vg2 = lvmetad_pvscan_vg(cmd, vg);
+			release_vg(vg);
+			vg = vg2;
+			fid = vg->fid;
+		}
+
 		dm_list_iterate_items(pvl, &vg->pvs) {
-			if ((info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
-				pvl->pv->label_sector = lvmcache_get_label(info)->sector;
-				pvl->pv->dev = lvmcache_device(info);
-				if (!pvl->pv->dev)
-					pvl->pv->status |= MISSING_PV;
-				if (!lvmcache_fid_add_mdas_pv(info, fid)) {
-					vg = NULL;
-					goto_out;	/* FIXME error path */
-				}
-			} else
-				pvl->pv->status |= MISSING_PV; /* probably missing */
+			if (!_pv_update_struct_pv(pvl->pv, fid)) {
+				vg = NULL;
+				goto_out;	/* FIXME error path */
+			}
+		}
+
+		dm_list_iterate_items(pvl, &vg->pvs_outdated) {
+			if (!_pv_update_struct_pv(pvl->pv, fid)) {
+				vg = NULL;
+				goto_out;	/* FIXME error path */
+			}
 		}

 		lvmcache_update_vg(vg, 0);
@@ -642,6 +714,56 @@ int lvmetad_pv_list_to_lvmcache(struct cmd_context *cmd)
 	return 1;
 }

+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids)
+{
+	struct vgnameid_list *vgnl;
+	struct id vgid;
+	const char *vgid_txt;
+	const char *vg_name;
+	daemon_reply reply;
+	struct dm_config_node *cn;
+
+	log_debug_lvmetad("Asking lvmetad for complete list of known VG ids/names");
+	reply = _lvmetad_send("vg_list", NULL);
+	if (!_lvmetad_handle_reply(reply, "list VGs", "", NULL)) {
+		daemon_reply_destroy(reply);
+		return_0;
+	}
+
+	if ((cn = dm_config_find_node(reply.cft->root, "volume_groups"))) {
+		for (cn = cn->child; cn; cn = cn->sib) {
+			vgid_txt = cn->key;
+			if (!id_read_format(&vgid, vgid_txt)) {
+				stack;
+				continue;
+			}
+
+			if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) {
+				log_error("vgnameid_list allocation failed.");
+				return 0;
+			}
+
+			if (!(vg_name = dm_config_find_str(cn->child, "name", NULL))) {
+				log_error("vg_list no name found.");
+				return 0;
+			}
+
+			vgnl->vgid = dm_pool_strdup(cmd->mem, (char *)&vgid);
+			vgnl->vg_name = dm_pool_strdup(cmd->mem, vg_name);
+
+			if (!vgnl->vgid || !vgnl->vg_name) {
+				log_error("vgnameid_list member allocation failed.");
+				return 0;
+			}
+
+			dm_list_add(vgnameids, &vgnl->list);
+		}
+	}
+
+	daemon_reply_destroy(reply);
+	return 1;
+}
+
 int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd)
 {
 	struct volume_group *tmp;
@@ -830,6 +952,51 @@ int lvmetad_pv_found(const struct id *pvid, struct device *dev, const struct for
 	     daemon_reply_int(reply, "seqno_after", -1) != daemon_reply_int(reply, "seqno_before", -1)))
 		log_warn("WARNING: Inconsistent metadata found for VG %s", vg->name);

+	/*
+	 * pvscan --cache does not perform any lvmlockd locking, and
+	 * pvscan --cache -aay skips autoactivation in lockd VGs.
+	 *
+	 * pvscan --cache populates lvmetad with VG metadata from disk.
+	 * No lvmlockd locking is needed.  It is expected that lockd VG
+	 * metadata that is read by pvscan and populated in lvmetad may
+	 * be immediately stale due to changes to the VG from other hosts
+	 * during or after this pvscan.  This is normal and not a problem.
+	 * When a subsequent lvm command uses the VG, it will lock the VG
+	 * with lvmlockd, read the VG from lvmetad, and update the cached
+	 * copy from disk if necessary.
+	 *
+	 * pvscan --cache -aay does not activate LVs in lockd VGs because
+	 * activation requires locking, and a lock-start operation is needed
+	 * on a lockd VG before any locking can be performed in it.
+	 *
+	 * An equivalent of pvscan --cache -aay for lockd VGs is:
+	 * 1. pvscan --cache
+	 * 2. vgchange --lock-start
+	 * 3. vgchange -aay -S 'locktype=sanlock || locktype=dlm'
+	 *
+	 * [We could eventually add support for autoactivating lockd VGs
+	 * using pvscan by incorporating the lock start step (which can
+	 * take a long time), but there may be a better option than
+	 * continuing to overload pvscan.]
+	 * 
+	 * Stages of starting a lockd VG:
+	 *
+	 * . pvscan --cache populates lockd VGs in lvmetad without locks,
+	 *   and this initial cached copy may quickly become stale.
+	 *
+	 * . vgchange --lock-start VG reads the VG without the VG lock
+	 *   because no locks are available until the locking is started.
+	 *   It only uses the VG name and lock_type from the VG metadata,
+	 *   and then only uses it to start the VG lockspace in lvmlockd.
+	 *
+	 * . Further lvm commands, e.g. activation, can then lock the VG
+	 *   with lvmlockd and use current VG metdata.
+	 */
+	if (handler && vg && is_lockd_type(vg->lock_type)) {
+		log_debug_lvmetad("Skip pvscan activation for lockd type VG %s", vg->name);
+		handler = NULL;
+	}
+
 	if (result && handler) {
 		status = daemon_reply_str(reply, "status", "<missing>");
 		vgname = daemon_reply_str(reply, "vgname", "<missing>");
@@ -897,7 +1064,8 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 	struct _lvmetad_pvscan_baton *b = baton;
 	struct volume_group *this;

-	this = mda_is_ignored(mda) ? NULL : mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1);
+	if (!(this = mda_is_ignored(mda) ? NULL : mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1)))
+		return 1;

 	/* FIXME Also ensure contents match etc. */
 	if (!b->vg || this->seqno > b->vg->seqno)
@@ -908,6 +1076,100 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton)
 	return 1;
 }

+/*
+ * The lock manager may detect that the vg cached in lvmetad is out of date,
+ * due to something like an lvcreate from another host.
+ * This is limited to changes that only affect the vg (not global state like
+ * orphan PVs), so we only need to reread mdas on the vg's existing pvs.
+ */
+
+static struct volume_group *lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg)
+{
+	struct volume_group *vg_ret = NULL;
+	struct dm_config_tree *vgmeta_ret = NULL;
+	struct dm_config_tree *vgmeta;
+	struct pv_list *pvl;
+	struct lvmcache_info *info;
+	struct format_instance *fid;
+	struct format_instance_ctx fic = { .type = 0 };
+	struct _lvmetad_pvscan_baton baton;
+
+	dm_list_iterate_items(pvl, &vg->pvs) {
+		/* missing pv */
+		if (!pvl->pv->dev)
+			continue;
+
+		if (!(info = lvmcache_info_from_pvid((const char *)&pvl->pv->id, 0))) {
+			log_error("Failed to find cached info for PV %s.", pv_dev_name(pvl->pv));
+			return NULL;
+		}
+
+		baton.vg = NULL;
+		baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+
+		if (!baton.fid)
+			return NULL;
+
+		if (baton.fid->fmt->features & FMT_OBSOLETE) {
+			log_error("WARNING: Ignoring obsolete format of metadata (%s) on device %s when using lvmetad",
+			  	baton.fid->fmt->name, dev_name(pvl->pv->dev));
+			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+			return NULL;
+		}
+
+		lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton);
+
+		if (!baton.vg) {
+			lvmcache_fmt(info)->ops->destroy_instance(baton.fid);
+			return NULL;
+		}
+
+		if (!(vgmeta = export_vg_to_config_tree(baton.vg))) {
+			log_error("VG export to config tree failed");
+			release_vg(baton.vg);
+			return NULL;
+		}
+
+		if (!vgmeta_ret) {
+			vgmeta_ret = vgmeta;
+		} else {
+			if (!compare_config(vgmeta_ret->root, vgmeta->root)) {
+				log_error("VG metadata comparison failed");
+				dm_config_destroy(vgmeta);
+				dm_config_destroy(vgmeta_ret);
+				release_vg(baton.vg);
+				return NULL;
+			}
+			dm_config_destroy(vgmeta);
+		}
+
+		release_vg(baton.vg);
+	}
+
+	if (vgmeta_ret) {
+		fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic);
+		if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) {
+			log_error("VG import from config tree failed");
+			lvmcache_fmt(info)->ops->destroy_instance(fid);
+			goto out;
+		}
+
+		/*
+		 * Update lvmetad with the newly read version of the VG.
+		 * The "precommitted" name is a misnomer in this case,
+		 * but that is the field which lvmetad_vg_update() uses
+		 * to send the metadata cft to lvmetad.
+		 */
+		vg_ret->cft_precommitted = vgmeta_ret;
+		if (!lvmetad_vg_update(vg_ret))
+			log_error("Failed to update lvmetad with new VG meta");
+		vg_ret->cft_precommitted = NULL;
+		dm_config_destroy(vgmeta_ret);
+	}
+out:
+	return vg_ret;
+}
+
 int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 			  activation_handler handler, int ignore_obsolete)
 {
@@ -1057,3 +1319,327 @@ int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handl
 {
 	return _lvmetad_pvscan_all_devs(cmd, handler, 1);
 }
+
+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg)
+{
+	char uuid[64];
+	daemon_reply reply;
+	int result;
+
+	if (!id_write_format(&vg->id, uuid, sizeof(uuid)))
+		return_0;
+
+	reply = _lvmetad_send("vg_clear_outdated_pvs", "vgid = %s", uuid, NULL);
+	result = _lvmetad_handle_reply(reply, "clear the list of outdated PVs", vg->name, NULL);
+	daemon_reply_destroy(reply);
+
+	return result;
+}
+
+/*
+ * Records the state of cached PVs in lvmetad so we can look for changes
+ * after rescanning.
+ */
+struct pv_cache_list {
+	struct dm_list list;
+	dev_t devt;
+	struct id pvid;
+	const char *vgid;
+	unsigned found : 1;
+	unsigned update_udev : 1;
+};
+
+/*
+ * Get the list of PVs known to lvmetad.
+ */
+static int _lvmetad_get_pv_cache_list(struct cmd_context *cmd, struct dm_list *pvc_list)
+{
+	daemon_reply reply;
+	struct dm_config_node *cn;
+	struct pv_cache_list *pvcl;
+	const char *pvid_txt;
+	const char *vgid;
+
+	if (!lvmetad_active())
+		return 1;
+
+	log_debug_lvmetad("Asking lvmetad for complete list of known PVs");
+	reply = _lvmetad_send("pv_list", NULL);
+	if (!_lvmetad_handle_reply(reply, "list PVs", "", NULL)) {
+		log_error("lvmetad message failed.");
+		daemon_reply_destroy(reply);
+		return_0;
+	}
+
+	if ((cn = dm_config_find_node(reply.cft->root, "physical_volumes"))) {
+		for (cn = cn->child; cn; cn = cn->sib) {
+			if (!(pvcl = dm_pool_zalloc(cmd->mem, sizeof(*pvcl)))) {
+				log_error("pv_cache_list allocation failed.");
+				return 0;
+			}
+
+			pvid_txt = cn->key;
+			if (!id_read_format(&pvcl->pvid, pvid_txt)) {
+				stack;
+				continue;
+			}
+
+			pvcl->devt = dm_config_find_int(cn->child, "device", 0);
+
+			if ((vgid = dm_config_find_str(cn->child, "vgid", NULL)))
+				pvcl->vgid = dm_pool_strdup(cmd->mem, vgid);
+
+			dm_list_add(pvc_list, &pvcl->list);
+		}
+	}
+
+	daemon_reply_destroy(reply);
+
+	return 1;
+}
+
+/*
+ * Opening the device RDWR should trigger a udev db update.
+ * FIXME: is there a better way to update the udev db than
+ * doing an open/close of the device? - For example writing
+ * "change" to /sys/block/<device>/uevent?
+ */
+static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt)
+{
+	struct device *dev;
+
+	log_debug_devs("device %d:%d open to update udev",
+		       (int)MAJOR(devt), (int)MINOR(devt));
+
+	if (!(dev = dev_cache_get_by_devt(devt, cmd->lvmetad_filter))) {
+		log_error("_update_pv_in_udev no dev found");
+		return;
+	}
+
+	if (!dev_open(dev)) {
+		stack;
+		return;
+	}
+
+	if (!dev_close(dev))
+		stack;
+}
+
+/*
+ * Compare before and after PV lists from before/after rescanning,
+ * and update udev db for changes.
+ *
+ * For PVs that have changed pvid or vgid in lvmetad from rescanning,
+ * there may be information in the udev database to update, so open
+ * these devices to trigger a udev update.
+ *
+ * "before" refers to the list of pvs from lvmetad before rescanning
+ * "after" refers to the list of pvs from lvmetad after rescanning
+ *
+ * Comparing both lists, we can see which PVs changed (pvid or vgid),
+ * and trigger a udev db update for those.
+ */
+static void _update_changed_pvs_in_udev(struct cmd_context *cmd,
+					struct dm_list *pvc_before,
+					struct dm_list *pvc_after)
+{
+	struct pv_cache_list *before;
+	struct pv_cache_list *after;
+	char id_before[ID_LEN + 1]  __attribute__((aligned(8)));
+	char id_after[ID_LEN + 1]  __attribute__((aligned(8)));
+	int found;
+
+	dm_list_iterate_items(before, pvc_before) {
+		found = 0;
+
+		dm_list_iterate_items(after, pvc_after) {
+			if (after->found)
+				continue;
+
+			if (before->devt != after->devt)
+				continue;
+
+			if (!id_equal(&before->pvid, &after->pvid)) {
+				memset(id_before, 0, sizeof(id_before));
+				memset(id_after, 0, sizeof(id_after));
+				strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+				strncpy(&id_after[0], (char *) &after->pvid, sizeof(id_after) - 1);
+
+				log_debug_devs("device %d:%d changed pvid from %s to %s",
+					       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+					       id_before, id_after);
+
+				before->update_udev = 1;
+
+			} else if ((before->vgid && !after->vgid) ||
+				   (after->vgid && !before->vgid) ||
+				   (before->vgid && after->vgid && strcmp(before->vgid, after->vgid))) {
+
+				log_debug_devs("device %d:%d changed vg from %s to %s",
+					       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+					       before->vgid ?: "none", after->vgid ?: "none");
+
+				before->update_udev = 1;
+			}
+
+			after->found = 1;
+			before->found = 1;
+			found = 1;
+			break;
+		}
+
+		if (!found) {
+			memset(id_before, 0, sizeof(id_before));
+			strncpy(&id_before[0], (char *) &before->pvid, sizeof(id_before) - 1);
+
+			log_debug_devs("device %d:%d pvid %s vg %s is gone",
+				       (int)MAJOR(before->devt), (int)MINOR(before->devt),
+				       id_before, before->vgid ? before->vgid : "none");
+
+			before->update_udev = 1;
+		}
+	}
+
+	dm_list_iterate_items(before, pvc_before) {
+		if (before->update_udev)
+			_update_pv_in_udev(cmd, before->devt);
+	}
+
+	dm_list_iterate_items(after, pvc_after) {
+		if (after->update_udev)
+			_update_pv_in_udev(cmd, after->devt);
+	}
+}
+
+/*
+ * Before this command was run, some external entity may have
+ * invalidated lvmetad's cache of global information, e.g. lvmlockd.
+ *
+ * The global information includes things like a new VG, a
+ * VG that was removed, the assignment of a PV to a VG;
+ * any change that is not isolated within a single VG.
+ *
+ * The external entity, like a lock manager, would invalidate
+ * the lvmetad global cache if it detected that the global
+ * information had been changed on disk by something other
+ * than a local lvm command, e.g. an lvm command on another
+ * host with access to the same devices.  (How it detects
+ * the change is specific to lock manager or other entity.)
+ *
+ * The effect is that metadata on disk is newer than the metadata
+ * in the local lvmetad daemon, and the local lvmetad's cache
+ * should be updated from disk before this command uses it.
+ *
+ * So, using this function, a command checks if lvmetad's global
+ * cache is valid.  If so, it does nothing.  If not, it rescans
+ * devices to update the lvmetad cache, then it notifies lvmetad
+ * that it's cache is valid again (consistent with what's on disk.)
+ * This command can then go ahead and use the newly refreshed metadata.
+ *
+ * 1. Check if the lvmetad global cache is invalid.
+ * 2. If so, reread metadata from all devices and update the lvmetad cache.
+ * 3. Tell lvmetad that the global cache is now valid.
+ */
+
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force)
+{
+	struct dm_list pvc_before; /* pv_cache_list */
+	struct dm_list pvc_after; /* pv_cache_list */
+	daemon_reply reply;
+	int global_invalid;
+
+	dm_list_init(&pvc_before);
+	dm_list_init(&pvc_after);
+
+	if (!lvmlockd_use()) {
+		log_error(INTERNAL_ERROR "validate global cache without lvmlockd");
+		return;
+	}
+
+	if (!lvmetad_used())
+		return;
+
+	log_debug_lvmetad("Validating global lvmetad cache");
+
+	if (force)
+		goto do_scan;
+
+	reply = daemon_send_simple(_lvmetad, "get_global_info",
+				   "token = %s", "skip",
+				   NULL);
+
+	if (reply.error) {
+		log_error("lvmetad_validate_global_cache get_global_info error %d", reply.error);
+		goto do_scan;
+	}
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK")) {
+		log_error("lvmetad_validate_global_cache get_global_info not ok");
+		goto do_scan;
+	}
+
+	global_invalid = daemon_reply_int(reply, "global_invalid", -1);
+
+	daemon_reply_destroy(reply);
+
+	if (!global_invalid) {
+		/* cache is valid */
+		return;
+	}
+
+ do_scan:
+	/*
+	 * Save the current state of pvs from lvmetad so after devices are
+	 * scanned, we can compare to the new state to see if pvs changed.
+	 */
+	_lvmetad_get_pv_cache_list(cmd, &pvc_before);
+
+	/*
+	 * Update the local lvmetad cache so it correctly reflects any
+	 * changes made on remote hosts.
+	 */
+	lvmetad_pvscan_all_devs(cmd, NULL);
+
+	/*
+	 * Clear the global_invalid flag in lvmetad.
+	 * Subsequent local commands that read global state
+	 * from lvmetad will not see global_invalid until
+	 * another host makes another global change.
+	 */
+	reply = daemon_send_simple(_lvmetad, "set_global_info",
+				   "token = %s", "skip",
+				   "global_invalid = %d", 0,
+				   NULL);
+	if (reply.error)
+		log_error("lvmetad_validate_global_cache set_global_info error %d", reply.error);
+
+	if (strcmp(daemon_reply_str(reply, "response", ""), "OK"))
+		log_error("lvmetad_validate_global_cache set_global_info not ok");
+
+	daemon_reply_destroy(reply);
+
+	/*
+	 * Populate this command's lvmcache structures from lvmetad.
+	 */
+	lvmcache_seed_infos_from_lvmetad(cmd);
+
+	/*
+	 * Update the local udev database to reflect PV changes from
+	 * other hosts.
+	 *
+	 * Compare the before and after PV lists, and if a PV's
+	 * pvid or vgid has changed, then open that device to trigger
+	 * a uevent to update the udev db.
+	 *
+	 * This has no direct benefit to lvm, but is just a best effort
+	 * attempt to keep the udev db updated and reflecting current
+	 * lvm information.
+	 *
+	 * FIXME: lvmcache_seed_infos_from_lvmetad() and _lvmetad_get_pv_cache_list()
+	 * each get pv_list from lvmetad, and they could share a single pv_list reply.
+	 */
+	if (!dm_list_empty(&pvc_before)) {
+		_lvmetad_get_pv_cache_list(cmd, &pvc_after);
+		_update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after);
+	}
+}
--- a/lib/cache/lvmetad.h
+++ b/lib/cache/lvmetad.h
@@ -29,8 +29,7 @@ typedef int (*activation_handler) (struct cmd_context *cmd,

 #ifdef LVMETAD_SUPPORT
 /*
- * Initialise the communication with lvmetad. Normally called by
- * lvmcache_init. Sets up a global handle for our process.
+ * Sets up a global handle for our process.
 */
 void lvmetad_init(struct cmd_context *);

@@ -59,7 +58,9 @@ int lvmetad_socket_present(void);

 /*
 * Check whether lvmetad is active (where active means both that it is running
- * and that we have a working connection with it).
+ * and that we have a working connection with it). It opens new connection
+ * with lvmetad in the process when lvmetad is supposed to be used and the
+ * connection is not open yet.
 */
 int lvmetad_active(void);

@@ -70,8 +71,9 @@ int lvmetad_active(void);
 void lvmetad_connect_or_warn(void);

 /*
- * Drop connection to lvmetad. A subsequent lvmetad_init() will re-establish
- * the connection (possibly at a different socket path).
+ * Drop connection to lvmetad. A subsequent lvmetad_connect_or_warn or
+ * lvmetad_active will re-establish the connection (possibly at a
+ * different socket path).
 */
 void lvmetad_disconnect(void);

@@ -142,6 +144,12 @@ int lvmetad_pv_lookup_by_dev(struct cmd_context *cmd, struct device *dev, int *f
 */
 int lvmetad_vg_list_to_lvmcache(struct cmd_context *cmd);

+/*
+ * Request a list of vgid/vgname pairs for all VGs known to lvmetad.
+ * Does not do vg_lookup's on each VG, and does not populate lvmcache.
+ */
+int lvmetad_get_vgnameids(struct cmd_context *cmd, struct dm_list *vgnameids);
+
 /*
 * Find a VG by its ID or its name in the lvmetad cache. Gives NULL if the VG is
 * not found.
@@ -158,6 +166,9 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev,
 int lvmetad_pvscan_all_devs(struct cmd_context *cmd, activation_handler handler);
 int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handler);

+int lvmetad_vg_clear_outdated_pvs(struct volume_group *vg);
+void lvmetad_validate_global_cache(struct cmd_context *cmd, int force);
+
 #  else		/* LVMETAD_SUPPORT */

 #    define lvmetad_init(cmd)	do { } while (0)
@@ -179,10 +190,13 @@ int lvmetad_pvscan_foreign_vgs(struct cmd_context *cmd, activation_handler handl
 #    define lvmetad_pv_lookup(cmd, pvid, found)	(0)
 #    define lvmetad_pv_lookup_by_dev(cmd, dev, found)	(0)
 #    define lvmetad_vg_list_to_lvmcache(cmd)	(1)
+#    define lvmetad_get_vgnameids(cmd, vgnameids)       do { } while (0)
 #    define lvmetad_vg_lookup(cmd, vgname, vgid)	(NULL)
 #    define lvmetad_pvscan_single(cmd, dev, handler, ignore_obsolete)	(0)
 #    define lvmetad_pvscan_all_devs(cmd, handler)	(0)
 #    define lvmetad_pvscan_foreign_vgs(cmd, handler)	(0)
+#    define lvmetad_vg_clear_outdated_pvs(vg)           (1)
+#    define lvmetad_validate_global_cache(cmd, force)	do { } while (0)

 #  endif	/* LVMETAD_SUPPORT */

--- a/lib/cache_segtype/cache.c
+++ b/lib/cache_segtype/cache.c
@@ -25,6 +25,11 @@
 #include "lv_alloc.h"
 #include "defaults.h"

+static const char _cache_module[] = "cache";
+
+/* TODO: using static field here, maybe should be a part of segment_type */
+static unsigned _feature_mask;
+
 #define SEG_LOG_ERROR(t, p...) \
        log_error(t " segment %s of logical volume %s.", ## p,	\
                  dm_config_parent_name(sn), seg->lv->name), 0;
@@ -66,20 +71,16 @@ static int _cache_pool_text_import(struct lv_segment *seg,
 	if (dm_config_has_node(sn, "cache_mode")) {
 		if (!(str = dm_config_find_str(sn, "cache_mode", NULL)))
 			return SEG_LOG_ERROR("cache_mode must be a string in");
-		if (!set_cache_pool_feature(&seg->feature_flags, str))
+		if (!cache_set_mode(seg, str))
 			return SEG_LOG_ERROR("Unknown cache_mode in");
-	} else
-		/* When missed in metadata, it's an old stuff - use writethrough */
-		seg->feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+	}

 	if (dm_config_has_node(sn, "policy")) {
 		if (!(str = dm_config_find_str(sn, "policy", NULL)))
 			return SEG_LOG_ERROR("policy must be a string in");
 		if (!(seg->policy_name = dm_pool_strdup(mem, str)))
 			return SEG_LOG_ERROR("Failed to duplicate policy in");
-	} else
-		/* Cannot use 'just' default, so pick one */
-		seg->policy_name = DEFAULT_CACHE_POOL_POLICY; /* FIXME make configurable */
+	}

 	/*
 	 * Read in policy args:
@@ -99,6 +100,9 @@ static int _cache_pool_text_import(struct lv_segment *seg,
 	 *   If the policy is not present, default policy is used.
 	 */
 	if ((sn = dm_config_find_node(sn, "policy_settings"))) {
+		if (!seg->policy_name)
+			return SEG_LOG_ERROR("policy_settings must have a policy_name in");
+
 		if (sn->v)
 			return SEG_LOG_ERROR("policy_settings must be a section in");

@@ -127,24 +131,33 @@ static int _cache_pool_text_export(const struct lv_segment *seg,
 {
 	const char *cache_mode;

-	if (!(cache_mode = get_cache_pool_cachemode_name(seg)))
-		return_0;
-
 	outf(f, "data = \"%s\"", seg_lv(seg, 0)->name);
 	outf(f, "metadata = \"%s\"", seg->metadata_lv->name);
 	outf(f, "chunk_size = %" PRIu32, seg->chunk_size);
-	outf(f, "cache_mode = \"%s\"", cache_mode);

-	if (seg->policy_name)
+	/*
+	 * Cache pool used by a cache LV holds data. Not ideal,
+	 * but not worth to break backward compatibility, by shifting
+	 * content to cache segment
+	 */
+	if (cache_mode_is_set(seg)) {
+		if (!(cache_mode = get_cache_mode_name(seg)))
+			return_0;
+		outf(f, "cache_mode = \"%s\"", cache_mode);
+	}
+
+	if (seg->policy_name) {
 		outf(f, "policy = \"%s\"", seg->policy_name);

-	if (seg->policy_settings) {
-		if (strcmp(seg->policy_settings->key, "policy_settings")) {
-			log_error(INTERNAL_ERROR "Incorrect policy_settings tree, %s.",
-				  seg->policy_settings->key);
-			return 0;
+		if (seg->policy_settings) {
+			if (strcmp(seg->policy_settings->key, "policy_settings")) {
+				log_error(INTERNAL_ERROR "Incorrect policy_settings tree, %s.",
+					  seg->policy_settings->key);
+				return 0;
+			}
+			if (seg->policy_settings->child)
+				out_config_node(f, seg->policy_settings);
 		}
-		out_config_node(f, seg->policy_settings);
 	}

 	return 1;
@@ -157,12 +170,29 @@ static void _destroy(struct segment_type *segtype)

 #ifdef DEVMAPPER_SUPPORT
 static int _target_present(struct cmd_context *cmd,
-				const struct lv_segment *seg __attribute__((unused)),
-				unsigned *attributes __attribute__((unused)))
+			   const struct lv_segment *seg __attribute__((unused)),
+			   unsigned *attributes __attribute__((unused)))
 {
-	uint32_t maj, min, patchlevel;
+	/* List of features with their kernel target version */
+	static const struct feature {
+		uint32_t maj;
+		uint32_t min;
+		unsigned cache_feature;
+		const char feature[12];
+		const char module[12]; /* check dm-%s */
+	} _features[] = {
+		{ 1, 3, CACHE_FEATURE_POLICY_MQ, "policy_mq", "cache-mq" },
+		{ 1, 8, CACHE_FEATURE_POLICY_SMQ, "policy_smq", "cache-smq" },
+	};
+	static const char _lvmconf[] = "global/cache_disabled_features";
+	static unsigned _attrs = 0;
 	static int _cache_checked = 0;
 	static int _cache_present = 0;
+	uint32_t maj, min, patchlevel;
+	unsigned i;
+	const struct dm_config_node *cn;
+	const struct dm_config_value *cv;
+	const char *str;

 	if (!_cache_checked) {
 		_cache_present = target_present(cmd, "cache", 1);
@@ -176,11 +206,53 @@ static int _target_present(struct cmd_context *cmd,

 		if ((maj < 1) ||
 		    ((maj == 1) && (min < 3))) {
-			log_error("The cache kernel module is version %u.%u.%u."
-				  "  Version 1.3.0+ is required.",
+			_cache_present = 0;
+			log_error("The cache kernel module is version %u.%u.%u. "
+				  "Version 1.3.0+ is required.",
 				  maj, min, patchlevel);
 			return 0;
 		}
+
+
+		for (i = 0; i < DM_ARRAY_SIZE(_features); ++i) {
+			if (((maj > _features[i].maj) ||
+			     (maj == _features[i].maj && min >= _features[i].min)) &&
+			    (!_features[i].module[0] || module_present(cmd, _features[i].module)))
+				_attrs |= _features[i].cache_feature;
+			else
+				log_very_verbose("Target %s does not support %s.",
+						 _cache_module, _features[i].feature);
+		}
+	}
+
+	if (attributes) {
+		if (!_feature_mask) {
+			/* Support runtime lvm.conf changes, N.B. avoid 32 feature */
+			if ((cn = find_config_tree_array(cmd, global_cache_disabled_features_CFG, NULL))) {
+				for (cv = cn->v; cv; cv = cv->next) {
+					if (cv->type != DM_CFG_STRING) {
+						log_error("Ignoring invalid string in config file %s.",
+							  _lvmconf);
+						continue;
+					}
+					str = cv->v.str;
+					if (!*str)
+						continue;
+					for (i = 0; i < DM_ARRAY_SIZE(_features); ++i)
+						if (strcasecmp(str, _features[i].feature) == 0)
+							_feature_mask |= _features[i].cache_feature;
+				}
+			}
+
+			_feature_mask = ~_feature_mask;
+
+			for (i = 0; i < DM_ARRAY_SIZE(_features); ++i)
+				if ((_attrs & _features[i].cache_feature) &&
+				    !(_feature_mask & _features[i].cache_feature))
+					log_very_verbose("Target %s %s support disabled by %s",
+							 _cache_module, _features[i].feature, _lvmconf);
+		}
+		*attributes = _attrs & _feature_mask;
 	}

 	return _cache_present;
@@ -282,9 +354,16 @@ static int _cache_add_target_line(struct dev_manager *dm,
 				 struct dm_tree_node *node, uint64_t len,
 				 uint32_t *pvmove_mirror_count __attribute__((unused)))
 {
-	struct lv_segment *cache_pool_seg = first_seg(seg->pool_lv);
+	struct lv_segment *cache_pool_seg;
 	char *metadata_uuid, *data_uuid, *origin_uuid;

+	if (!seg->pool_lv || !seg_is_cache(seg)) {
+		log_error(INTERNAL_ERROR "Passed segment is not cache.");
+		return 0;
+	}
+
+	cache_pool_seg = first_seg(seg->pool_lv);
+
 	if (!(metadata_uuid = build_dm_uuid(mem, cache_pool_seg->metadata_lv, NULL)))
 		return_0;

@@ -299,7 +378,9 @@ static int _cache_add_target_line(struct dev_manager *dm,
 					   metadata_uuid,
 					   data_uuid,
 					   origin_uuid,
-					   seg->cleaner_policy ? "cleaner" : cache_pool_seg->policy_name,
+					   seg->cleaner_policy ? "cleaner" :
+						   /* undefined policy name -> likely an old "mq" */
+						   cache_pool_seg->policy_name ? : "mq",
 					   seg->cleaner_policy ? NULL : cache_pool_seg->policy_settings,
 					   cache_pool_seg->chunk_size))
 		return_0;
@@ -361,5 +442,8 @@ int init_cache_segtypes(struct cmd_context *cmd,
 		return_0;
 	log_very_verbose("Initialised segtype: %s", segtype->name);

+	/* Reset mask for recalc */
+	_feature_mask = 0;
+
 	return 1;
 }
--- a/lib/commands/toolcontext.c
+++ b/lib/commands/toolcontext.c
@@ -30,6 +30,7 @@
 #include "lvmcache.h"
 #include "lvmetad.h"
 #include "archiver.h"
+#include "lvmpolld-client.h"

 #ifdef HAVE_LIBDL
 #include "sharedlib.h"
@@ -244,8 +245,10 @@ static int _parse_debug_classes(struct cmd_context *cmd)
 	const struct dm_config_value *cv;
 	int debug_classes = 0;

-	if (!(cn = find_config_tree_node(cmd, log_debug_classes_CFG, NULL)))
-		return DEFAULT_LOGGED_DEBUG_CLASSES;
+	if (!(cn = find_config_tree_array(cmd, log_debug_classes_CFG, NULL))) {
+		log_error(INTERNAL_ERROR "Unable to find configuration for log/debug_classes.");
+		return -1;
+	}

 	for (cv = cn->v; cv; cv = cv->next) {
 		if (cv->type != DM_CFG_STRING) {
@@ -273,6 +276,8 @@ static int _parse_debug_classes(struct cmd_context *cmd)
 			debug_classes |= LOG_CLASS_CACHE;
 		else if (!strcasecmp(cv->v.str, "locking"))
 			debug_classes |= LOG_CLASS_LOCKING;
+		else if (!strcasecmp(cv->v.str, "lvmpolld"))
+			debug_classes |= LOG_CLASS_LVMPOLLD;
 		else
 			log_verbose("Unrecognised value for log/debug_classes: %s", cv->v.str);
 	}
@@ -410,6 +415,57 @@ static int _check_config(struct cmd_context *cmd)
 	return 1;
 }

+static const char *_set_time_format(struct cmd_context *cmd)
+{
+	/* Compared to strftime, we do not allow "newline" character - the %n in format. */
+	static const char *allowed_format_chars = "aAbBcCdDeFGghHIjklmMpPrRsStTuUVwWxXyYzZ%";
+	static const char *allowed_alternative_format_chars_e = "cCxXyY";
+	static const char *allowed_alternative_format_chars_o = "deHImMSuUVwWy";
+	static const char *chars_to_check;
+	const char *tf = find_config_tree_str(cmd, report_time_format_CFG, NULL);
+	const char *p_fmt;
+	size_t i;
+	char c;
+
+	if (!*tf) {
+		log_error("Configured time format is empty string.");
+		goto bad;
+	} else {
+		p_fmt = tf;
+		while ((c = *p_fmt)) {
+			if (c == '%') {
+				c = *++p_fmt;
+				if (c == 'E') {
+					c = *++p_fmt;
+					chars_to_check = allowed_alternative_format_chars_e;
+				} else if (c == 'O') {
+					c = *++p_fmt;
+					chars_to_check = allowed_alternative_format_chars_o;
+				} else
+					chars_to_check = allowed_format_chars;
+
+				for (i = 0; chars_to_check[i]; i++) {
+					if (c == chars_to_check[i])
+						break;
+				}
+				if (!chars_to_check[i])
+					goto_bad;
+			}
+			else if (isprint(c))
+				p_fmt++;
+			else {
+				log_error("Configured time format contains non-printable characters.");
+				goto bad;
+			}
+		}
+	}
+
+	return tf;
+bad:
+	log_error("Invalid time format \"%s\" supplied.", tf);
+	return NULL;
+}
+
 int process_profilable_config(struct cmd_context *cmd)
 {
 	if (!(cmd->default_settings.unit_factor =
@@ -423,6 +479,8 @@ int process_profilable_config(struct cmd_context *cmd)
 	cmd->report_binary_values_as_numeric = find_config_tree_bool(cmd, report_binary_values_as_numeric_CFG, NULL);
 	cmd->default_settings.suffix = find_config_tree_bool(cmd, global_suffix_CFG, NULL);
 	cmd->report_list_item_separator = find_config_tree_str(cmd, report_list_item_separator_CFG, NULL);
+	if (!(cmd->time_format = _set_time_format(cmd)))
+		return 0;

 	return 1;
 }
@@ -474,7 +532,6 @@ static int _process_config(struct cmd_context *cmd)
 	const struct dm_config_node *cn;
 	const struct dm_config_value *cv;
 	int64_t pv_min_kb;
-	const char *lvmetad_socket;
 	int udev_disabled = 0;
 	char sysfs_dir[PATH_MAX];

@@ -595,7 +652,7 @@ static int _process_config(struct cmd_context *cmd)
 		}
 	}

-	if ((cn = find_config_tree_node(cmd, activation_mlock_filter_CFG, NULL)))
+	if ((cn = find_config_tree_array(cmd, activation_mlock_filter_CFG, NULL)))
 		for (cv = cn->v; cv; cv = cv->next) 
 			if ((cv->type != DM_CFG_STRING) || !cv->v.str[0]) 
 				log_error("Ignoring invalid activation/mlock_filter entry in config file");
@@ -617,30 +674,6 @@ static int _process_config(struct cmd_context *cmd)
 	init_detect_internal_vg_cache_corruption
 		(find_config_tree_bool(cmd, global_detect_internal_vg_cache_corruption_CFG, NULL));

-	lvmetad_disconnect();
-
-	lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
-	if (!lvmetad_socket)
-		lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
-
-	/* TODO?
-		lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
-						      DEFAULT_RUN_DIR "/lvmetad.socket");
-	*/
-	lvmetad_set_socket(lvmetad_socket);
-	cn = find_config_tree_node(cmd, devices_global_filter_CFG, NULL);
-	lvmetad_set_token(cn ? cn->v : NULL);
-
-	if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
-	    find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
-		log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
-			 "Clustered environment not supported by lvmetad yet.");
-		lvmetad_set_active(NULL, 0);
-	} else
-		lvmetad_set_active(NULL, find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
-
-	lvmetad_init(cmd);
-
 	if (!_init_system_id(cmd))
 		return_0;

@@ -950,15 +983,9 @@ static int _init_dev_cache(struct cmd_context *cmd)

 	init_obtain_device_list_from_udev(device_list_from_udev);

-	if (!(cn = find_config_tree_node(cmd, devices_scan_CFG, NULL))) {
-		if (!dev_cache_add_dir("/dev")) {
-			log_error("Failed to add /dev to internal "
-				  "device cache");
-			return 0;
-		}
-		log_verbose("device/scan not in config file: "
-			    "Defaulting to /dev");
-		return 1;
+	if (!(cn = find_config_tree_array(cmd, devices_scan_CFG, NULL))) {
+		log_error(INTERNAL_ERROR "Unable to find configuration for devices/scan.");
+		return_0;
 	}

 	for (cv = cn->v; cv; cv = cv->next) {
@@ -996,7 +1023,7 @@ static int _init_dev_cache(struct cmd_context *cmd)
 		}
 	}

-	if (!(cn = find_config_tree_node(cmd, devices_loopfiles_CFG, NULL)))
+	if (!(cn = find_config_tree_array(cmd, devices_loopfiles_CFG, NULL)))
 		return 1;

 	for (cv = cn->v; cv; cv = cv->next) {
@@ -1134,7 +1161,7 @@ bad:
 *     md component filter -> fw raid filter
 *
 */
-static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache)
 {
 	const char *dev_cache;
 	struct dev_filter *filter = NULL, *filter_components[2] = {0};
@@ -1142,6 +1169,11 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
 	const struct dm_config_node *cn;
 	struct timespec ts, cts;

+	if (!cmd->initialized.connections) {
+		log_error(INTERNAL_ERROR "connections must be initialized before filters");
+		return 0;
+	}
+
 	cmd->dump_filter = 0;

 	cmd->lvmetad_filter = _init_lvmetad_filter_chain(cmd);
@@ -1171,7 +1203,7 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
 	}

 	/* filter component 1 */
-	if ((cn = find_config_tree_node(cmd, devices_filter_CFG, NULL))) {
+	if ((cn = find_config_tree_array(cmd, devices_filter_CFG, NULL))) {
 		if (!(filter_components[1] = regex_filter_create(cn->v)))
 			goto_bad;
 		/* we have two filter components - create composite filter */
@@ -1222,6 +1254,7 @@ static int _init_filters(struct cmd_context *cmd, unsigned load_persistent_cache
 				    dev_cache);
 	}

+	cmd->initialized.filters = 1;
 	return 1;
 bad:
 	if (!filter) {
@@ -1245,6 +1278,7 @@ bad:
 	if (cmd->lvmetad_filter)
 		cmd->lvmetad_filter->destroy(cmd->lvmetad_filter);

+	cmd->initialized.filters = 0;
 	return 0;
 }

@@ -1288,7 +1322,7 @@ static int _init_formats(struct cmd_context *cmd)
 #ifdef HAVE_LIBDL
 	/* Load any formats in shared libs if not static */
 	if (!is_static() &&
-	    (cn = find_config_tree_node(cmd, global_format_libraries_CFG, NULL))) {
+	    (cn = find_config_tree_array(cmd, global_format_libraries_CFG, NULL))) {

 		const struct dm_config_value *cv;
 		struct format_type *(*init_format_fn) (struct cmd_context *);
@@ -1454,7 +1488,7 @@ static int _init_segtypes(struct cmd_context *cmd)
 #ifdef HAVE_LIBDL
 	/* Load any formats in shared libs unless static */
 	if (!is_static() &&
-	    (cn = find_config_tree_node(cmd, global_segment_libraries_CFG, NULL))) {
+	    (cn = find_config_tree_array(cmd, global_segment_libraries_CFG, NULL))) {

 		const struct dm_config_value *cv;
 		int (*init_multiple_segtypes_fn) (struct cmd_context *,
@@ -1618,11 +1652,80 @@ static int _reopen_stream(FILE *stream, int fd, const char *mode, const char *na
 	return 1;
 }

+static int _init_lvmetad(struct cmd_context *cmd)
+{
+	const struct dm_config_node *cn;
+	const char *lvmetad_socket;
+
+	lvmetad_disconnect();
+
+	lvmetad_socket = getenv("LVM_LVMETAD_SOCKET");
+	if (!lvmetad_socket)
+		lvmetad_socket = DEFAULT_RUN_DIR "/lvmetad.socket";
+
+	/* TODO?
+		lvmetad_socket = find_config_tree_str(cmd, "lvmetad/socket_path",
+						      DEFAULT_RUN_DIR "/lvmetad.socket");
+	*/
+
+	lvmetad_set_socket(lvmetad_socket);
+	cn = find_config_tree_array(cmd, devices_global_filter_CFG, NULL);
+	lvmetad_set_token(cn ? cn->v : NULL);
+
+	if (find_config_tree_int(cmd, global_locking_type_CFG, NULL) == 3 &&
+	    find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL)) {
+		log_warn("WARNING: configuration setting use_lvmetad overridden to 0 due to locking_type 3. "
+			 "Clustered environment not supported by lvmetad yet.");
+		lvmetad_set_active(NULL, 0);
+	} else
+		lvmetad_set_active(NULL, find_config_tree_bool(cmd, global_use_lvmetad_CFG, NULL));
+
+	lvmetad_init(cmd);
+	return 1;
+}
+
+static int _init_lvmpolld(struct cmd_context *cmd)
+{
+	const char *lvmpolld_socket;
+
+	lvmpolld_disconnect();
+
+	lvmpolld_socket = getenv("LVM_LVMPOLLD_SOCKET");
+	if (!lvmpolld_socket)
+		lvmpolld_socket = DEFAULT_RUN_DIR "/lvmpolld.socket";
+	lvmpolld_set_socket(lvmpolld_socket);
+
+	lvmpolld_set_active(find_config_tree_bool(cmd, global_use_lvmpolld_CFG, NULL));
+	return 1;
+}
+
+int init_connections(struct cmd_context *cmd)
+{
+
+	if (!_init_lvmetad(cmd)) {
+		log_error("Failed to initialize lvmetad connection.");
+		goto bad;
+	}
+
+	if (!_init_lvmpolld(cmd)) {
+		log_error("Failed to initialize lvmpolld connection.");
+		goto bad;
+	}
+
+	cmd->initialized.connections = 1;
+	return 1;
+bad:
+	cmd->initialized.connections = 0;
+	return 0;
+}
+
 /* Entry point */
 struct cmd_context *create_toolcontext(unsigned is_long_lived,
 				       const char *system_dir,
 				       unsigned set_buffering,
-				       unsigned threaded)
+				       unsigned threaded,
+				       unsigned set_connections,
+				       unsigned set_filters)
 {
 	struct cmd_context *cmd;
 	FILE *new_stream;
@@ -1760,15 +1863,12 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,
 		goto_out;

 	if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
-						find_config_tree_node(cmd, devices_types_CFG, NULL))))
+						find_config_tree_array(cmd, devices_types_CFG, NULL))))
 		goto_out;

 	if (!_init_dev_cache(cmd))
 		goto_out;

-	if (!_init_filters(cmd, 1))
-		goto_out;
-
 	memlock_init(cmd);

 	if (!_init_formats(cmd))
@@ -1787,12 +1887,18 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived,

 	_init_globals(cmd);

+	if (set_connections && !init_connections(cmd))
+		return_0;
+
+	if (set_filters && !init_filters(cmd, 1))
+		goto_out;
+
 	cmd->default_settings.cache_vgmetadata = 1;
 	cmd->current_settings = cmd->default_settings;

-	cmd->config_initialized = 1;
+	cmd->initialized.config = 1;
 out:
-	if (!cmd->config_initialized) {
+	if (!cmd->initialized.config) {
 		destroy_toolcontext(cmd);
 		cmd = NULL;
 	}
@@ -1864,14 +1970,19 @@ static void _destroy_filters(struct cmd_context *cmd)
 		cmd->full_filter->destroy(cmd->full_filter);
 		cmd->lvmetad_filter = cmd->filter = cmd->full_filter = NULL;
 	}
+	cmd->initialized.filters = 0;
 }

 int refresh_filters(struct cmd_context *cmd)
 {
 	int r, saved_ignore_suspended_devices = ignore_suspended_devices();

+	if (!cmd->initialized.filters)
+		/* if filters not initialized, there's nothing to refresh */
+		return 1;
+
 	_destroy_filters(cmd);
-	if (!(r = _init_filters(cmd, 0)))
+	if (!(r = init_filters(cmd, 0)))
                stack;

 	/*
@@ -1900,7 +2011,6 @@ int refresh_toolcontext(struct cmd_context *cmd)
 	label_exit();
 	_destroy_segtypes(&cmd->segtypes);
 	_destroy_formats(cmd, &cmd->formats);
-	_destroy_filters(cmd);

 	if (!dev_cache_exit())
 		stack;
@@ -1918,7 +2028,7 @@ int refresh_toolcontext(struct cmd_context *cmd)

 	_destroy_config(cmd);

-	cmd->config_initialized = 0;
+	cmd->initialized.config = 0;

 	cmd->hosttags = 0;

@@ -1975,15 +2085,12 @@ int refresh_toolcontext(struct cmd_context *cmd)
 		return_0;

 	if (!(cmd->dev_types = create_dev_types(cmd->proc_dir,
-						find_config_tree_node(cmd, devices_types_CFG, NULL))))
+						find_config_tree_array(cmd, devices_types_CFG, NULL))))
 		return_0;

 	if (!_init_dev_cache(cmd))
 		return_0;

-	if (!_init_filters(cmd, 0))
-		return_0;
-
 	if (!_init_formats(cmd))
 		return_0;

@@ -1996,7 +2103,13 @@ int refresh_toolcontext(struct cmd_context *cmd)
 	if (!_init_backup(cmd))
 		return_0;

-	cmd->config_initialized = 1;
+	cmd->initialized.config = 1;
+
+	if (cmd->initialized.connections && !init_connections(cmd))
+		return_0;
+
+	if (!refresh_filters(cmd))
+		return_0;

 	reset_lvm_errno(1);
 	return 1;
@@ -2065,6 +2178,7 @@ void destroy_toolcontext(struct cmd_context *cmd)

 	lvmetad_release_token();
 	lvmetad_disconnect();
+	lvmpolld_disconnect();

 	release_log_memory();
 	activation_exit();
--- a/lib/commands/toolcontext.h
+++ b/lib/commands/toolcontext.h
@@ -60,29 +60,59 @@ struct config_tree_list {
 	struct dm_config_tree *cft;
 };

+struct cmd_context_initialized_parts {
+	unsigned config:1; /* used to reinitialize config if previous init was not successful */
+	unsigned filters:1;
+	unsigned connections:1;
+};
+
 /* FIXME Split into tool & library contexts */
 /* command-instance-related variables needed by library */
 struct cmd_context {
-	struct dm_pool *libmem;	/* For permanent config data */
-	struct dm_pool *mem;	/* Transient: Cleared between each command */
+	/*
+	 * Memory handlers.
+	 */
+	struct dm_pool *libmem;			/* for permanent config data */
+	struct dm_pool *mem;			/* transient: cleared between each command */

-	const struct format_type *fmt;	/* Current format to use by default */
-	struct format_type *fmt_backup;	/* Format to use for backups */
-
-	struct dm_list formats;	/* Available formats */
-	struct dm_list segtypes;	/* Available segment types */
-	const char *system_id;
-	const char *hostname;
-	const char *kernel_vsn;
-
-	unsigned rand_seed;
-	char *linebuffer;
+	/*
+	 * Command line and arguments.
+	 */
 	const char *cmd_line;
 	struct command *command;
 	char **argv;
 	struct arg_values *arg_values;
 	struct dm_list arg_value_groups;
-	unsigned is_long_lived:1;	/* Optimises persistent_filter handling */
+
+	/*
+	 * Format handlers.
+	 */
+	const struct format_type *fmt;		/* current format to use by default */
+	struct format_type *fmt_backup;		/* format to use for backups */
+	struct dm_list formats;			/* available formats */
+	struct dm_list segtypes;		/* available segment types */
+
+	/*
+	 * Machine and system identification.
+	 */
+	const char *system_id;
+	const char *hostname;
+	const char *kernel_vsn;
+
+	/*
+	 * Device identification.
+	 */
+	struct dev_types *dev_types;		/* recognized extra device types. */
+
+	/*
+	 * Initialization state.
+	 */
+	struct cmd_context_initialized_parts initialized;
+
+	/*
+	 * Switches.
+	 */
+	unsigned is_long_lived:1;		/* optimises persistent_filter handling */
 	unsigned handles_missing_pvs:1;
 	unsigned handles_unknown_segments:1;
 	unsigned use_linear_target:1;
@@ -93,59 +123,72 @@ struct cmd_context {
 	unsigned report_binary_values_as_numeric:1;
 	unsigned metadata_read_only:1;
 	unsigned ignore_clustered_vgs:1;
-	unsigned threaded:1;		/* Set if running within a thread e.g. clvmd */
-
-	unsigned independent_metadata_areas:1;	/* Active formats have MDAs outside PVs */
+	unsigned threaded:1;			/* set if running within a thread e.g. clvmd */
+	unsigned independent_metadata_areas:1;	/* active formats have MDAs outside PVs */
 	unsigned unknown_system_id:1;
-	unsigned include_foreign_vgs:1;
-	unsigned include_active_foreign_vgs:1;
-	unsigned error_foreign_vgs:1;
-
-	struct dev_types *dev_types;
+	unsigned include_foreign_vgs:1;		/* report/display cmds can reveal foreign VGs */
+	unsigned include_shared_vgs:1;		/* report/display cmds can reveal lockd VGs */
+	unsigned include_active_foreign_vgs:1;	/* cmd should process foreign VGs with active LVs */
+	unsigned vg_read_print_access_error:1;	/* print access errors from vg_read */
+	unsigned lockd_gl_disable:1;
+	unsigned lockd_vg_disable:1;
+	unsigned lockd_lv_disable:1;
+	unsigned lockd_gl_removed:1;
+	unsigned lockd_vg_default_sh:1;
+	unsigned lockd_vg_enforce_sh:1;

 	/*
-	 * Use of filters depends on whether lvmetad is used or not:
-	 *
-	 *   - if lvmetad is used:
-	 *   	- cmd->lvmetad_filter used when scanning devices for lvmetad
-	 *   	- cmd->filter used when processing lvmetad responses
-	 *   	- cmd->full_filter used for remaining situations
-	 *
-	 *   - if lvmetad is not used:
-	 *   	- cmd->lvmetad_filter is NULL
-	 *   	- cmd->filter == cmd->full_filter used for all situations
-	 *
+	 * Filtering.
 	 */
-	struct dev_filter *lvmetad_filter;
-	struct dev_filter *filter;
-	struct dev_filter *full_filter;
-	int dump_filter;	/* Dump filter when exiting? */
+	struct dev_filter *lvmetad_filter;	/* pre-lvmetad filter chain */
+	struct dev_filter *filter;		/* post-lvmetad filter chain */
+	struct dev_filter *full_filter;		/* lvmetad_filter + filter */
+	int dump_filter;			/* Dump filter when exiting? */

-	struct dm_list config_files; /* master lvm config + any existing tag configs */
-	struct profile_params *profile_params; /* profile handling params including loaded profile configs */
-	struct dm_config_tree *cft; /* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
-	int config_initialized; /* used to reinitialize config if previous init was not successful */
-
-	struct dm_hash_table *cft_def_hash; /* config definition hash used for validity check (item type + item recognized) */
-
-	/* selected settings with original default/configured value which can be changed during cmd processing */
-	struct config_info default_settings;
-	/* may contain changed values compared to default_settings */
-	struct config_info current_settings;
+	/*
+	 * Configuration.
+	 */
+	struct dm_list config_files; 		/* master lvm config + any existing tag configs */
+	struct profile_params *profile_params;	/* profile handling params including loaded profile configs */
+	struct dm_config_tree *cft;		/* the whole cascade: CONFIG_STRING -> CONFIG_PROFILE -> CONFIG_FILE/CONFIG_MERGED_FILES */
+	struct dm_hash_table *cft_def_hash;	/* config definition hash used for validity check (item type + item recognized) */
+	struct config_info default_settings;	/* selected settings with original default/configured value which can be changed during cmd processing */
+	struct config_info current_settings; 	/* may contain changed values compared to default_settings */

+	/*
+	 * Archives and backups.
+	 */
 	struct archive_params *archive_params;
 	struct backup_params *backup_params;
 	const char *stripe_filler;

-	/* List of defined tags */
-	struct dm_list tags;
-	const char *report_list_item_separator;
+	/*
+	 * Host tags.
+	 */
+	struct dm_list tags;			/* list of defined tags */
 	int hosttags;

-	const char *lib_dir;		/* Cache value global/library_dir */
+	/*
+	 * Paths.
+	 */
+	const char *lib_dir;			/* cache value global/library_dir */
 	char system_dir[PATH_MAX];
 	char dev_dir[PATH_MAX];
 	char proc_dir[PATH_MAX];
+
+	/*
+	 * Buffers.
+	 */
+	char display_buffer[NAME_LEN * 10];	/* ring buffer for upto 10 longest vg/lv names */
+	unsigned display_lvname_idx;		/* index to ring buffer */
+	char *linebuffer;
+
+	/*
+	 * Others - unsorted.
+	 */
+	const char *report_list_item_separator;
+	const char *time_format;
+	unsigned rand_seed;
 };

 /*
@@ -155,13 +198,17 @@ struct cmd_context {
 struct cmd_context *create_toolcontext(unsigned is_long_lived,
 				       const char *system_dir,
 				       unsigned set_buffering,
-				       unsigned threaded);
+				       unsigned threaded,
+				       unsigned set_connections,
+				       unsigned set_filters);
 void destroy_toolcontext(struct cmd_context *cmd);
 int refresh_toolcontext(struct cmd_context *cmd);
 int refresh_filters(struct cmd_context *cmd);
 int process_profilable_config(struct cmd_context *cmd);
 int config_files_changed(struct cmd_context *cmd);
 int init_lvmcache_orphans(struct cmd_context *cmd);
+int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache);
+int init_connections(struct cmd_context *cmd);

 struct format_type *get_format_by_name(struct cmd_context *cmd, const char *format);

--- a/lib/config/config.c
+++ b/lib/config/config.c
@@ -23,6 +23,7 @@
 #include "toolcontext.h"
 #include "lvm-file.h"
 #include "memlock.h"
+#include "segtype.h"

 #include <sys/stat.h>
 #include <sys/mman.h>
@@ -65,11 +66,11 @@ struct config_source {
 * Map each ID to respective definition of the configuration item.
 */
 static struct cfg_def_item _cfg_def_items[CFG_COUNT + 1] = {
-#define cfg_section(id, name, parent, flags, since_version, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, comment},
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, comment},
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, comment},
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, comment},
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_SECTION, {0}, flags, since_version, {0}, deprecated_since_version, deprecation_comment, comment},
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.v_##type = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, type, {.fn_##type = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.v_CFG_TYPE_STRING = default_value}, flags, since_version, {.v_UNCONFIGURED = unconfigured_value}, deprecated_since_version, deprecation_comment, comment},
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) {id, parent, name, CFG_TYPE_ARRAY | types, {.fn_CFG_TYPE_STRING = get_default_##id}, flags | CFG_DEFAULT_RUN_TIME, since_version, {.fn_UNCONFIGURED = get_default_unconfigured_##id}, deprecated_since_version, deprecation_comment, comment},
 #include "config_settings.h"
 #undef cfg_section
 #undef cfg
@@ -604,6 +605,7 @@ struct timespec config_file_timestamp(struct dm_config_tree *cft)
 }

 #define cfg_def_get_item_p(id) (&_cfg_def_items[id])
+#define cfg_def_get_default_unconfigured_value_hint(cmd,item) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_unconfigured_value.fn_UNCONFIGURED(cmd) : item->default_unconfigured_value.v_UNCONFIGURED)
 #define cfg_def_get_default_value_hint(cmd,item,type,profile) ((item->flags & CFG_DEFAULT_RUN_TIME) ? item->default_value.fn_##type(cmd,profile) : item->default_value.v_##type)
 #define cfg_def_get_default_value(cmd,item,type,profile) (item->flags & CFG_DEFAULT_UNDEFINED ? 0 : cfg_def_get_default_value_hint(cmd,item,type,profile))

@@ -665,22 +667,28 @@ static void _log_type_error(const char *path, cfg_def_type_t actual,
 					     actual_type_name, expected_type_name);
 }

-static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
-						     const cfg_def_item_t *def)
+static struct dm_config_value *_get_def_array_values(struct cmd_context *cmd,
+						     struct dm_config_tree *cft,
+						     const cfg_def_item_t *def,
+						     uint32_t format_flags)
 {
+	const char *def_enc_value;
 	char *enc_value, *token, *p, *r;
 	struct dm_config_value *array = NULL, *v = NULL, *oldv = NULL;

-	if (!def->default_value.v_CFG_TYPE_STRING) {
+	def_enc_value = cfg_def_get_default_value(cmd, def, CFG_TYPE_ARRAY, NULL);
+
+	if (!def_enc_value) {
 		if (!(array = dm_config_create_value(cft))) {
 			log_error("Failed to create default empty array for %s.", def->name);
 			return NULL;
 		}
 		array->type = DM_CFG_EMPTY_ARRAY;
+		dm_config_value_set_format_flags(array, format_flags);
 		return array;
 	}

-	if (!(p = token = enc_value = dm_strdup(def->default_value.v_CFG_TYPE_STRING))) {
+	if (!(p = token = enc_value = dm_strdup(def_enc_value))) {
 		log_error("_get_def_array_values: dm_strdup failed");
 		return NULL;
 	}
@@ -709,6 +717,9 @@ static struct dm_config_value *_get_def_array_values(struct dm_config_tree *cft,
 			dm_free(enc_value);
 			return NULL;
 		}
+
+		dm_config_value_set_format_flags(v, format_flags);
+
 		if (oldv)
 			oldv->next = v;
 		if (!array)
@@ -825,6 +836,12 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
 	float f;
 	const char *str;

+	if ((handle->ignoreunsupported && (def->flags & CFG_UNSUPPORTED)) ||
+	    (handle->ignoreadvanced && (def->flags & CFG_ADVANCED))) {
+		diff = 0;
+		goto out;
+	}
+
 	/* if default value is undefined, the value used differs from default */
 	if (def->flags & CFG_DEFAULT_UNDEFINED) {
 		diff = 1;
@@ -832,7 +849,7 @@ static int _check_value_differs_from_default(struct cft_check_handle *handle,
 	}

 	if (!v_def && (def->type & CFG_TYPE_ARRAY)) {
-		if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cft, def)))
+		if (!(v_def_array = v_def_iter = _get_def_array_values(handle->cmd, handle->cft, def, 0)))
 			return_0;
 		do {
 			/* iterate over each element of the array and check its value */
@@ -1024,9 +1041,14 @@ static int _config_def_check_tree(struct cft_check_handle *handle,
 				  size_t buf_size, struct dm_config_node *root)
 {
 	struct dm_config_node *cn;
+	cfg_def_item_t *def;
 	int valid, r = 1;
 	size_t len;

+	def = cfg_def_get_item_p(root->id);
+	if (def->flags & CFG_SECTION_NO_CHECK)
+		return 1;
+
 	for (cn = root->child; cn; cn = cn->sib) {
 		if ((valid = _config_def_check_node(handle, vp, pvp, rp, prp,
 						    buf_size, cn)) && !cn->v) {
@@ -1348,6 +1370,106 @@ int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profi
 	return b;
 }

+static struct dm_config_node *_get_array_def_node(struct cmd_context *cmd,
+						  cfg_def_item_t *def,
+						  struct profile *profile)
+{
+	struct dm_config_node *cn;
+
+	if (def->flags & CFG_DEFAULT_UNDEFINED)
+		return NULL;
+
+	if (!(cn = dm_config_create_node(cmd->cft, def->name))) {
+		log_error("Failed to create default array node for %s.", def->name);
+		return NULL;
+	}
+
+	if (!(cn->v = _get_def_array_values(cmd, cmd->cft, def, 0))) {
+		dm_pool_free(cmd->cft->mem, cn);
+		return_NULL;
+	}
+
+	return cn;
+}
+
+struct _config_array_out_handle {
+	struct dm_pool *mem;
+	char *str;
+};
+
+static int _config_array_line(const struct dm_config_node *cn, const char *line, void *baton)
+{
+	struct _config_array_out_handle *handle = (struct _config_array_out_handle *) baton;
+
+	if (!(handle->str = dm_pool_strdup(handle->mem, line))) {
+		log_error("_config_array_line: dm_pool_strdup failed");
+		return 0;
+	}
+
+	return 1;
+}
+
+static void _log_array_value_used(struct dm_pool *mem, const struct dm_config_node *cn,
+				  const char *path, int default_used)
+{
+	struct _config_array_out_handle out_handle = { 0 };
+	struct dm_config_node_out_spec out_spec = { 0 };
+	uint32_t old_format_flags;
+
+	out_handle.mem = mem;
+	out_spec.line_fn = _config_array_line;
+
+	old_format_flags = dm_config_value_get_format_flags(cn->v);
+	dm_config_value_set_format_flags(cn->v,
+		DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES |
+		DM_CONFIG_VALUE_FMT_COMMON_ARRAY);
+
+	if (!dm_config_write_one_node_out(cn, &out_spec, &out_handle)) {
+		log_error("_log_array_value_used: failed to write node value");
+		out_handle.mem = NULL;
+	}
+
+	if (default_used)
+		log_very_verbose("%s not found in config: defaulting to %s",
+				 path, out_handle.mem ? out_handle.str : "<unknown>");
+	else
+		log_very_verbose("Setting %s to %s",
+				 path, out_handle.mem ? out_handle.str : "<unknown>");
+
+	if (out_handle.mem)
+		dm_pool_free(out_handle.mem, out_handle.str);
+	dm_config_value_set_format_flags(cn->v, old_format_flags);
+}
+
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile)
+{
+	cfg_def_item_t *item = cfg_def_get_item_p(id);
+	char path[CFG_PATH_MAX_LEN];
+	int profile_applied;
+	const struct dm_config_node *cn = NULL, *cn_def = NULL;
+	profile_applied = _apply_local_profile(cmd, profile);
+	_cfg_def_make_path(path, sizeof(path), item->id, item, 0);
+
+	if (!(item->type & CFG_TYPE_ARRAY))
+		log_error(INTERNAL_ERROR "%s cfg tree element not declared as array.", path);
+
+	if (_config_disabled(cmd, item, path) ||
+	    !(cn = find_config_tree_node(cmd, id, profile)))
+		cn_def = _get_array_def_node(cmd, item, profile);
+
+	if (cn)
+		_log_array_value_used(cmd->cft->mem, cn, path, 0);
+	else if (cn_def) {
+		_log_array_value_used(cmd->cft->mem, cn_def, path, 1);
+		cn = cn_def;
+	}
+
+	if (profile_applied)
+		remove_config_tree_by_source(cmd, profile->source);
+
+	return cn;
+}
+
 /* Insert cn2 after cn1 */
 static void _insert_config_node(struct dm_config_node **cn1,
 				struct dm_config_node *cn2)
@@ -1488,6 +1610,55 @@ struct out_baton {
 	struct dm_pool *mem;
 };

+#define MAX_COMMENT_LINE 512
+
+static int _copy_one_line(const char *comment, char *line, int *pos, int len)
+{
+	int p;
+	int i = 0;
+	char c;
+
+	if (*pos >= len)
+		return 0;
+
+	memset(line, 0, MAX_COMMENT_LINE+1);
+
+	for (p = *pos; ; p++) {
+		c = comment[p];
+
+		(*pos)++;
+
+		if (c == '\n' || c == '\0')
+			break;
+
+		line[i++] = c;
+
+		if (i == MAX_COMMENT_LINE)
+		       break;
+	}
+
+	return i;
+}
+
+static int _get_config_node_version(uint16_t version_enc, char *version)
+{
+	if (dm_snprintf(version, 9, "%u.%u.%u",
+			(version_enc & 0xE000) >> 13,
+			(version_enc & 0x1E00) >> 9,
+			(version_enc & 0x1FF)) == -1) {
+		log_error("_get_config_node_version: couldn't create version string");
+		return 0;
+	}
+
+	return 1;
+}
+
+static int _def_node_is_deprecated(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+	return def->deprecated_since_version &&
+	       (spec->version >= def->deprecated_since_version);
+}
+
 static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, void *baton)
 {
 	struct out_baton *out = baton;
@@ -1495,15 +1666,13 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi
 	char version[9]; /* 8+1 chars for max version of 7.15.511 */
 	const char *node_type_name = cn->v ? "option" : "section";
 	char path[CFG_PATH_MAX_LEN];
+	char commentline[MAX_COMMENT_LINE+1];

-
-	if (cn->id < 0)
+	if (cn->id <= 0)
 		return 1;

-	if (!cn->id) {
-		log_error(INTERNAL_ERROR "Configuration node %s has invalid id.", cn->key);
-		return 0;
-	}
+	if (out->tree_spec->type == CFG_DEF_TREE_LIST)
+		return 1;

 	if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
 	    (!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
@@ -1511,12 +1680,27 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi

 	cfg_def = cfg_def_get_item_p(cn->id);

-	if (out->tree_spec->withcomments) {
+	if (out->tree_spec->withsummary || out->tree_spec->withcomments) {
 		_cfg_def_make_path(path, sizeof(path), cfg_def->id, cfg_def, 1);
+		fprintf(out->fp, "\n");
 		fprintf(out->fp, "%s# Configuration %s %s.\n", line, node_type_name, path);

-		if (cfg_def->comment)
-			fprintf(out->fp, "%s# %s\n", line, cfg_def->comment);
+		if (out->tree_spec->withcomments &&
+		    _def_node_is_deprecated(cfg_def, out->tree_spec))
+			fprintf(out->fp, "%s# %s", line, cfg_def->deprecation_comment);
+
+		if (cfg_def->comment) {
+			int pos = 0;
+			while (_copy_one_line(cfg_def->comment, commentline, &pos, strlen(cfg_def->comment))) {
+				fprintf(out->fp, "%s# %s\n", line, commentline);
+				/* withsummary prints only the first comment line. */
+				if (!out->tree_spec->withcomments)
+					break;
+			}
+		}
+
+		if (_def_node_is_deprecated(cfg_def, out->tree_spec))
+			fprintf(out->fp, "%s# This configuration %s is deprecated.\n", line, node_type_name);

 		if (cfg_def->flags & CFG_ADVANCED)
 			fprintf(out->fp, "%s# This configuration %s is advanced.\n", line, node_type_name);
@@ -1529,34 +1713,98 @@ static int _out_prefix_fn(const struct dm_config_node *cn, const char *line, voi

 		if (cfg_def->flags & CFG_DEFAULT_UNDEFINED)
 			fprintf(out->fp, "%s# This configuration %s does not have a default value defined.\n", line, node_type_name);
+
+		if ((out->tree_spec->type == CFG_DEF_TREE_FULL) &&
+		    (out->tree_spec->check_status[cn->id] & CFG_USED))
+			fprintf(out->fp, "%s# Value defined in existing configuration has been used for this setting.\n", line);
 	}

 	if (out->tree_spec->withversions) {
-		if (dm_snprintf(version, 9, "%u.%u.%u",
-				(cfg_def->since_version & 0xE000) >> 13,
-				(cfg_def->since_version & 0x1E00) >> 9,
-				(cfg_def->since_version & 0x1FF)) == -1) {
-			log_error("_out_prefix_fn: couldn't create version string");
-			return 0;
+		if (!_get_config_node_version(cfg_def->since_version, version))
+			return_0;
+		fprintf(out->fp, "%s# Available since version %s.\n", line, version);
+
+		if (_def_node_is_deprecated(cfg_def, out->tree_spec)) {
+			if (!_get_config_node_version(cfg_def->deprecated_since_version, version))
+				return_0;
+			fprintf(out->fp, "%s# Deprecated since version %s.\n", line, version);
 		}
-		fprintf(out->fp, "%s# Since version %s.\n", line, version);
 	}

 	return 1;
 }

+static int _should_print_cfg_with_undef_def_val(struct out_baton *out, cfg_def_item_t *cfg_def,
+						const struct dm_config_node *cn)
+{
+	if (!(cfg_def->flags & CFG_DEFAULT_UNDEFINED))
+		return 1;
+
+	/* print it only if the value is directly defined in some config = it's used */
+	return out->tree_spec->check_status && (out->tree_spec->check_status[cn->id] & CFG_USED);
+}
+
 static int _out_line_fn(const struct dm_config_node *cn, const char *line, void *baton)
 {
 	struct out_baton *out = baton;
-	struct cfg_def_item *cfg_def = cfg_def_get_item_p(cn->id);
+	struct cfg_def_item *cfg_def;
+	char config_path[CFG_PATH_MAX_LEN];
+	char summary[MAX_COMMENT_LINE+1];
+	char version[9];
+	int pos = 0;
+	size_t len;
+	char *space_prefix;

 	if ((out->tree_spec->type == CFG_DEF_TREE_DIFF) &&
 	    (!(out->tree_spec->check_status[cn->id] & CFG_DIFF)))
 		return 1;

-	fprintf(out->fp, "%s%s\n", (out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
-				   (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
-				   (cfg_def->flags & CFG_DEFAULT_UNDEFINED) ? "#" : "", line);
+	cfg_def = cfg_def_get_item_p(cn->id);
+
+	if (out->tree_spec->type == CFG_DEF_TREE_LIST) {
+		/* List view with node paths and summary. */
+		if (cfg_def->type & CFG_TYPE_SECTION)
+			return 1;
+		if (!_cfg_def_make_path(config_path, CFG_PATH_MAX_LEN, cfg_def->id, cfg_def, 1))
+			return_0;
+		if (out->tree_spec->withversions && !_get_config_node_version(cfg_def->since_version, version))
+			return_0;
+
+		summary[0] = '\0';
+		if (out->tree_spec->withsummary && cfg_def->comment)
+			_copy_one_line(cfg_def->comment, summary, &pos, strlen(cfg_def->comment));
+
+		fprintf(out->fp, "%s%s%s%s%s%s%s\n", config_path,
+			*summary || out->tree_spec->withversions ? " - ": "",
+			*summary ? summary : "",
+			*summary ? " " : "",
+			out->tree_spec->withversions ? "[" : "",
+			out->tree_spec->withversions ? version : "",
+			out->tree_spec->withversions ? "]" : "");
+
+		return 1;
+	}
+
+	/* Usual tree view with nodes and their values. */
+
+	if ((out->tree_spec->type != CFG_DEF_TREE_CURRENT) &&
+	    (out->tree_spec->type != CFG_DEF_TREE_DIFF) &&
+	    (out->tree_spec->type != CFG_DEF_TREE_FULL) &&
+	    (cfg_def->flags & (CFG_DEFAULT_UNDEFINED | CFG_DEFAULT_COMMENTED))) {
+		/* print with # at the front to comment out the line */
+		if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn)) {
+			space_prefix = ((len = strspn(line, "\t "))) ? dm_pool_strndup(out->mem, line, len) : NULL;
+			fprintf(out->fp, "%s%s%s\n", space_prefix ? : "", "# ", line + len);
+			if (space_prefix)
+				dm_pool_free(out->mem, space_prefix);
+		}
+		return 1;
+	}
+
+	/* print the line as it is */
+	if (_should_print_cfg_with_undef_def_val(out, cfg_def, cn))
+		fprintf(out->fp, "%s\n", line);
+
 	return 1;
 }

@@ -1624,20 +1872,31 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 {
 	struct dm_config_node *cn;
 	const char *str;
+	uint32_t format_flags = 0;

 	if (!(cn = dm_config_create_node(cft, def->name))) {
 		log_error("Failed to create default config setting node.");
 		return NULL;
 	}

-	if (!(def->type & CFG_TYPE_SECTION) && (!(cn->v = dm_config_create_value(cft)))) {
-		log_error("Failed to create default config setting node value.");
-		return NULL;
+	if (!(def->type & CFG_TYPE_SECTION) && !(def->type & CFG_TYPE_ARRAY)) {
+		if (!(cn->v = dm_config_create_value(cft))) {
+			log_error("Failed to create default config setting node value.");
+			return NULL;
+		}
+		if (spec->withspaces)
+			format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
 	}

 	cn->id = def->id;

-	if (!(def->type & CFG_TYPE_ARRAY)) {
+	if (spec->unconfigured && def->default_unconfigured_value.v_UNCONFIGURED) {
+		cn->v->type = DM_CFG_STRING;
+		cn->v->v.str = cfg_def_get_default_unconfigured_value_hint(spec->cmd, def);
+		if (def->type != CFG_TYPE_STRING)
+			format_flags |= DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES;
+		dm_config_value_set_format_flags(cn->v, format_flags);
+	} else if (!(def->type & CFG_TYPE_ARRAY)) {
 		switch (def->type) {
 			case CFG_TYPE_SECTION:
 				cn->v = NULL;
@@ -1649,6 +1908,8 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 			case CFG_TYPE_INT:
 				cn->v->type = DM_CFG_INT;
 				cn->v->v.i = cfg_def_get_default_value_hint(spec->cmd, def, CFG_TYPE_INT, NULL);
+				if (def->flags & CFG_FORMAT_INT_OCTAL)
+					format_flags |= DM_CONFIG_VALUE_FMT_INT_OCTAL;
 				break;
 			case CFG_TYPE_FLOAT:
 				cn->v->type = DM_CFG_FLOAT;
@@ -1665,8 +1926,13 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 				return NULL;
 				break;
 		}
-	} else
-		cn->v = _get_def_array_values(cft, def);
+		dm_config_value_set_format_flags(cn->v, format_flags);
+	} else {
+		if (spec->withspaces)
+			format_flags |= DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES;
+		format_flags |= DM_CONFIG_VALUE_FMT_COMMON_ARRAY;
+		cn->v = _get_def_array_values(spec->cmd, cft, def, format_flags);
+	}

 	cn->child = NULL;
 	if (parent) {
@@ -1682,6 +1948,11 @@ static struct dm_config_node *_add_def_node(struct dm_config_tree *cft,
 	return cn;
 }

+static int _should_skip_deprecated_def_node(cfg_def_item_t *def, struct config_def_tree_spec *spec)
+{
+	return spec->ignoredeprecated && _def_node_is_deprecated(def, spec);
+}
+
 static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_id, int id)
 {
 	cfg_def_item_t *def = cfg_def_get_item_p(id);
@@ -1693,6 +1964,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 		return 1;

 	switch (spec->type) {
+		case CFG_DEF_TREE_FULL:
+			/* fall through */
 		case CFG_DEF_TREE_MISSING:
 			if (!spec->check_status) {
 				log_error_once(INTERNAL_ERROR "couldn't determine missing "
@@ -1700,19 +1973,27 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 				return 1;
 			}
 			if ((spec->check_status[id] & CFG_USED) ||
-			    (def->flags & CFG_NAME_VARIABLE) ||
-			    (def->since_version > spec->version))
+			    (def->flags & CFG_NAME_VARIABLE))
+				return 1;
+
+			if ((spec->type == CFG_DEF_TREE_MISSING) &&
+			    ((def->since_version > spec->version) ||
+			     _should_skip_deprecated_def_node(def, spec)))
 				return 1;
 			break;
 		case CFG_DEF_TREE_NEW:
-			if (def->since_version != spec->version)
+			if ((def->since_version != spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			break;
 		case CFG_DEF_TREE_PROFILABLE:
+			/* fall through */
 		case CFG_DEF_TREE_PROFILABLE_CMD:
+			/* fall through */
 		case CFG_DEF_TREE_PROFILABLE_MDA:
 			if (!(def->flags & CFG_PROFILABLE) ||
-			    (def->since_version > spec->version))
+			    (def->since_version > spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			flags = def->flags & ~CFG_PROFILABLE;
 			if (spec->type == CFG_DEF_TREE_PROFILABLE_CMD) {
@@ -1724,7 +2005,8 @@ static int _should_skip_def_node(struct config_def_tree_spec *spec, int section_
 			}
 			break;
 		default:
-			if (def->since_version > spec->version)
+			if ((def->since_version > spec->version) ||
+			    _should_skip_deprecated_def_node(def, spec))
 				return 1;
 			break;
 	}
@@ -1763,7 +2045,7 @@ bad:

 struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 {
-	struct dm_config_tree *cft;
+	struct dm_config_tree *cft = NULL, *tmp_cft = NULL;
 	struct dm_config_node *root = NULL, *relay = NULL, *tmp;
 	int id;

@@ -1776,6 +2058,9 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 		if (cfg_def_get_item_p(id)->parent != root_CFG_SECTION)
 			continue;

+		if (spec->ignorelocal && (id == local_CFG_SECTION))
+			continue;
+
 		if ((tmp = _add_def_section_subtree(cft, spec, root, relay, id))) {
 			relay = tmp;
 			if (!root)
@@ -1784,7 +2069,33 @@ struct dm_config_tree *config_def_create_tree(struct config_def_tree_spec *spec)
 	}

 	cft->root = root;
+
+	if (spec->type == CFG_DEF_TREE_FULL) {
+		if (!(tmp_cft = dm_config_create())) {
+			log_error("Failed to create temporary config tree while creating full tree.");
+			goto bad;
+		}
+
+		if (!(tmp_cft->root = dm_config_clone_node_with_mem(cft->mem, spec->current_cft->root, 1))) {
+			log_error("Failed to clone current config tree.");
+			goto bad;
+		}
+
+		if (!merge_config_tree(spec->cmd, cft, tmp_cft, CONFIG_MERGE_TYPE_RAW)) {
+			log_error("Failed to merge default and current config tree.");
+			goto bad;
+		}
+
+		dm_config_destroy(tmp_cft);
+	}
+
 	return cft;
+bad:
+	if (cft)
+		dm_config_destroy(cft);
+	if (tmp_cft)
+		dm_config_destroy(tmp_cft);
+	return NULL;
 }

 static int _check_profile(struct cmd_context *cmd, struct profile *profile)
@@ -1963,6 +2274,11 @@ const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct pr
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_CACHE_SUBDIR@";
+}
+
 const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	const char *cache_dir = NULL, *cache_file_prefix = NULL;
@@ -1997,6 +2313,24 @@ const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profil
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd)
+{
+	const char *cache_file_prefix = NULL;
+	static char buf[PATH_MAX];
+
+	if (find_config_tree_node(cmd, devices_cache_file_prefix_CFG, NULL))
+		cache_file_prefix = find_config_tree_str_allow_empty(cmd, devices_cache_file_prefix_CFG, NULL);
+
+	if (dm_snprintf(buf, sizeof(buf), "%s/%s.cache",
+			get_default_unconfigured_devices_cache_dir_CFG(cmd),
+			cache_file_prefix ? : DEFAULT_CACHE_FILE_PREFIX) < 0) {
+		log_error("Persistent cache filename too long.");
+		return NULL;
+	}
+
+	return dm_pool_strdup(cmd->mem, buf);
+}
+
 const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -2010,6 +2344,11 @@ const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct pr
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_BACKUP_SUBDIR@";
+}
+
 const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -2023,6 +2362,11 @@ const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct p
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_ARCHIVE_SUBDIR@";
+}
+
 const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	static char buf[PATH_MAX];
@@ -2036,6 +2380,11 @@ const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct p
 	return dm_pool_strdup(cmd->mem, buf);
 }

+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd)
+{
+	return "@DEFAULT_SYS_DIR@/@DEFAULT_PROFILE_SUBDIR@";
+}
+
 const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile)
 {
 	return find_config_tree_str(cmd, activation_mirror_device_fault_policy_CFG, profile);
@@ -2067,3 +2416,27 @@ int get_default_allocation_cache_pool_chunk_size_CFG(struct cmd_context *cmd, st
 {
 	return DEFAULT_CACHE_POOL_CHUNK_SIZE * 2;
 }
+
+const char *get_default_allocation_cache_policy_CFG(struct cmd_context *cmd, struct profile *profile)
+{
+	const struct segment_type *segtype = get_segtype_from_string(cmd, "cache");
+	unsigned attr = ~0;
+
+	if (!segtype ||
+	    !segtype->ops->target_present ||
+	    !segtype->ops->target_present(cmd, NULL, &attr)) {
+		log_warn("WARNING: Cannot detect default cache policy, using \""
+			 DEFAULT_CACHE_POLICY "\".");
+		return DEFAULT_CACHE_POLICY;
+	}
+
+	if (attr & CACHE_FEATURE_POLICY_SMQ)
+		return "smq";
+
+	if (attr & CACHE_FEATURE_POLICY_MQ)
+		return "mq";
+
+	log_warn("WARNING: Default cache policy not available.");
+
+	return NULL;
+}
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -50,7 +50,7 @@ struct profile_params {
 	struct dm_list profiles;                 /* list of profiles which are loaded already and which are ready for use */
 };

-#define CFG_PATH_MAX_LEN 64
+#define CFG_PATH_MAX_LEN 128

 /*
 * Structures used for definition of a configuration tree.
@@ -72,6 +72,7 @@ typedef int (*t_fn_CFG_TYPE_INT) (struct cmd_context *cmd, struct profile *profi
 typedef float (*t_fn_CFG_TYPE_FLOAT) (struct cmd_context *cmd, struct profile *profile);
 typedef const char* (*t_fn_CFG_TYPE_STRING) (struct cmd_context *cmd, struct profile *profile);
 typedef const char* (*t_fn_CFG_TYPE_ARRAY) (struct cmd_context *cmd, struct profile *profile);
+typedef const char* (*t_fn_UNCONFIGURED) (struct cmd_context *cmd);

 /* configuration definition item value (for item's default value) */
 typedef union {
@@ -88,6 +89,11 @@ typedef union {
 	t_fn_CFG_TYPE_ARRAY fn_CFG_TYPE_ARRAY;
 } cfg_def_value_t;

+typedef union {
+	const char *v_UNCONFIGURED;
+	t_fn_UNCONFIGURED fn_UNCONFIGURED;
+} cfg_def_unconfigured_value_t;
+
 /* configuration definition item flags: */


@@ -107,46 +113,62 @@ typedef union {
 #define CFG_PROFILABLE_METADATA 0x030
 /* whether the default value is undefned */
 #define CFG_DEFAULT_UNDEFINED	0x040
+/* whether the default value is commented out on output */
+#define CFG_DEFAULT_COMMENTED	0x080
 /* whether the default value is calculated during run time */
-#define CFG_DEFAULT_RUN_TIME	0x080
+#define CFG_DEFAULT_RUN_TIME	0x100
 /* whether the configuration setting is disabled (and hence defaults always used) */
-#define CFG_DISABLED		0x100
+#define CFG_DISABLED		0x200
+/* whether to print integers in octal form (prefixed by "0") */
+#define CFG_FORMAT_INT_OCTAL	0x400
+/* whether to disable checks for the whole config section subtree */
+#define CFG_SECTION_NO_CHECK	0x800

 /* configuration definition item structure */
 typedef struct cfg_def_item {
-	int id;				/* ID of this item */
-	int parent;			/* ID of parent item */
-	const char *name;		/* name of the item in configuration tree */
-	int type;			/* configuration item type (bits of cfg_def_type_t) */
-	cfg_def_value_t default_value;	/* default value (only for settings) */
-	uint16_t flags;			/* configuration item definition flags */
-	uint16_t since_version;		/* version this item appeared in */
-	const char *comment;		/* brief comment */
+	int id;								/* ID of this item */
+	int parent;							/* ID of parent item */
+	const char *name;						/* name of the item in configuration tree */
+	int type;							/* configuration item type (bits of cfg_def_type_t) */
+	cfg_def_value_t default_value;					/* default value (only for settings) */
+	uint16_t flags;							/* configuration item definition flags */
+	uint16_t since_version;						/* version this item appeared in */
+	cfg_def_unconfigured_value_t default_unconfigured_value;	/* default value in terms of @FOO@, pre-configured (only for settings) */
+	uint16_t deprecated_since_version;				/* version since this item is deprecated */
+	const char *deprecation_comment;				/* comment about reasons for deprecation and settings that supersede this one */
+	const char *comment;						/* comment */
 } cfg_def_item_t;

 /* configuration definition tree types */
 typedef enum {
 	CFG_DEF_TREE_CURRENT,		/* tree of nodes with values currently set in the config */
 	CFG_DEF_TREE_MISSING,		/* tree of nodes missing in current config using default values */
-	CFG_DEF_TREE_COMPLETE,		/* CURRENT + MISSING, the tree actually used within execution, not implemented yet */
+	CFG_DEF_TREE_FULL,		/* CURRENT + MISSING, the tree actually used within execution */
 	CFG_DEF_TREE_DEFAULT,		/* tree of all possible config nodes with default values */
 	CFG_DEF_TREE_NEW,		/* tree of all new nodes that appeared in given version */
 	CFG_DEF_TREE_PROFILABLE,	/* tree of all nodes that are customizable by profiles */
 	CFG_DEF_TREE_PROFILABLE_CMD,	/* tree of all nodes that are customizable by command profiles (subset of PROFILABLE) */
 	CFG_DEF_TREE_PROFILABLE_MDA,	/* tree of all nodes that are customizable by metadata profiles (subset of PROFILABLE) */
 	CFG_DEF_TREE_DIFF,		/* tree of all nodes that differ from defaults */
+	CFG_DEF_TREE_LIST,		/* list all nodes */
 } cfg_def_tree_t;

 /* configuration definition tree specification */
 struct config_def_tree_spec {
-	struct cmd_context *cmd;	/* command context (for run-time defaults */
-	cfg_def_tree_t type;		/* tree type */
-	uint16_t version;		/* tree at this LVM2 version */
+	struct cmd_context *cmd;		/* command context (for run-time defaults */
+	struct dm_config_tree *current_cft;	/* current config tree which is defined explicitly - defaults are not used */
+	cfg_def_tree_t type;			/* tree type */
+	uint16_t version;			/* tree at this LVM2 version */
 	unsigned ignoreadvanced:1;		/* do not include advanced configs */
-	unsigned ignoreunsupported:1;	/* do not include unsupported configs */
-	unsigned withcomments:1;		/* include comments */
+	unsigned ignoreunsupported:1;		/* do not include unsupported configs */
+	unsigned ignoredeprecated:1;		/* do not include deprecated configs */
+	unsigned ignorelocal:1;			/* do not include the local section */
+	unsigned withsummary:1;			/* include first line of comments - a summary */
+	unsigned withcomments:1;		/* include all comment lines */
 	unsigned withversions:1;		/* include versions */
-	uint8_t *check_status;		/* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
+	unsigned withspaces:1;			/* add more spaces in output for better readability */
+	unsigned unconfigured:1;		/* use unconfigured path strings */
+	uint8_t *check_status;			/* status of last tree check (currently needed for CFG_DEF_TREE_MISSING only) */
 };


@@ -161,11 +183,11 @@ struct config_def_tree_spec {
 * Register ID for each possible item in the configuration tree.
 */
 enum {
-#define cfg_section(id, name, parent, flags, since_version, comment) id,
-#define cfg(id, name, parent, flags, type, default_value, since_version, comment) id,
-#define cfg_runtime(id, name, parent, flags, type, since_version, comment) id,
-#define cfg_array(id, name, parent, flags, types, default_value, since_version, comment) id,
-#define cfg_array_runtime(id, name, parent, flags, types, since_version, comment) id,
+#define cfg_section(id, name, parent, flags, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg(id, name, parent, flags, type, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_runtime(id, name, parent, flags, type, since_version, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array(id, name, parent, flags, types, default_value, since_version, unconfigured_value, deprecated_since_version, deprecation_comment, comment) id,
+#define cfg_array_runtime(id, name, parent, flags, types, since_version, deprecated_since_version, deprecation_comment, comment) id,
 #include "config_settings.h"
 #undef cfg_section
 #undef cfg
@@ -187,6 +209,8 @@ struct cft_check_handle {
 	unsigned skip_if_checked:1;	/* skip the check if already done before - return last state */
 	unsigned suppress_messages:1;	/* suppress messages during the check if config item is found invalid */
 	unsigned check_diff:1;		/* check if the value used differs from default one */
+	unsigned ignoreadvanced:1;	/* do not include advnced configs */
+	unsigned ignoreunsupported:1;	/* do not include unsupported configs */
 	uint8_t status[CFG_COUNT];	/* flags for each configuration item - the result of the check */
 };

@@ -250,18 +274,29 @@ int find_config_tree_int(struct cmd_context *cmd, int id, struct profile *profil
 int64_t find_config_tree_int64(struct cmd_context *cmd, int id, struct profile *profile);
 float find_config_tree_float(struct cmd_context *cmd, int id, struct profile *profile);
 int find_config_tree_bool(struct cmd_context *cmd, int id, struct profile *profile);
+const struct dm_config_node *find_config_tree_array(struct cmd_context *cmd, int id, struct profile *profile);

 /*
 * Functions for configuration settings for which the default
 * value is evaluated at runtime based on command context.
 */
 const char *get_default_devices_cache_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_dir_CFG(struct cmd_context *cmd);
 const char *get_default_devices_cache_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_devices_cache_CFG(struct cmd_context *cmd);
 const char *get_default_backup_backup_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_backup_dir_CFG(struct cmd_context *cmd);
 const char *get_default_backup_archive_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_backup_archive_dir_CFG(struct cmd_context *cmd);
 const char *get_default_config_profile_dir_CFG(struct cmd_context *cmd, struct profile *profile);
+const char *get_default_unconfigured_config_profile_dir_CFG(struct cmd_context *cmd);
 const char *get_default_activation_mirror_image_fault_policy_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_activation_mirror_image_fault_policy_CFG NULL
 int get_default_allocation_thin_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_thin_pool_chunk_size_CFG NULL
 int get_default_allocation_cache_pool_chunk_size_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_cache_pool_chunk_size_CFG NULL
+const char *get_default_allocation_cache_policy_CFG(struct cmd_context *cmd, struct profile *profile);
+#define get_default_unconfigured_allocation_cache_policy_CFG NULL

 #endif
--- a/lib/config/config_settings.h
+++ b/lib/config/config_settings.h
--- a/lib/config/defaults.h
+++ b/lib/config/defaults.h
@@ -51,11 +51,14 @@
 #define DEFAULT_FALLBACK_TO_LOCAL_LOCKING 1
 #define DEFAULT_FALLBACK_TO_CLUSTERED_LOCKING 1
 #define DEFAULT_WAIT_FOR_LOCKS 1
+#define DEFAULT_LVMLOCKD_LOCK_RETRIES 3
 #define DEFAULT_PRIORITISE_WRITE_LOCKS 1
 #define DEFAULT_USE_MLOCKALL 0
 #define DEFAULT_METADATA_READ_ONLY 0
 #define DEFAULT_LVDISPLAY_SHOWS_FULL_DEVICE_PATH 0

+#define DEFAULT_SANLOCK_LV_EXTEND_MB 256
+
 #define DEFAULT_MIRRORLOG MIRROR_LOG_DISK
 #define DEFAULT_MIRROR_LOG_FAULT_POLICY "allocate"
 #define DEFAULT_MIRROR_IMAGE_FAULT_POLICY "remove"
@@ -76,12 +79,17 @@
 #endif

 #ifdef THIN_CHECK_NEEDS_CHECK
-#  define DEFAULT_THIN_CHECK_OPTIONS "-q --clear-needs-check-flag"
+#  define DEFAULT_THIN_CHECK_OPTION1 "-q"
+#  define DEFAULT_THIN_CHECK_OPTION2 "--clear-needs-check-flag"
+#  define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1 "#S" DEFAULT_THIN_CHECK_OPTION2
 #else
-#  define DEFAULT_THIN_CHECK_OPTIONS "-q"
+#  define DEFAULT_THIN_CHECK_OPTION1 "-q"
+#  define DEFAULT_THIN_CHECK_OPTION2 ""
+#  define DEFAULT_THIN_CHECK_OPTIONS_CONFIG "#S" DEFAULT_THIN_CHECK_OPTION1
 #endif

-#define DEFAULT_THIN_REPAIR_OPTIONS ""
+#define DEFAULT_THIN_REPAIR_OPTION1 ""
+#define DEFAULT_THIN_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_THIN_REPAIR_OPTION1
 #define DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
 #define DEFAULT_THIN_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024)  /* KB */
 #define DEFAULT_THIN_POOL_MIN_METADATA_SIZE 2048  /* KB */
@@ -93,23 +101,27 @@
 #define DEFAULT_THIN_POOL_ZERO 1
 #define DEFAULT_POOL_METADATA_SPARE 1 /* thin + cache */

-#define DEFAULT_CACHE_CHECK_OPTIONS "-q"
-#define DEFAULT_CACHE_REPAIR_OPTIONS ""
+#ifdef CACHE_CHECK_NEEDS_CHECK
+#  define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+#  define DEFAULT_CACHE_CHECK_OPTION2 "--clear-needs-check-flag"
+#  define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1 "#S" DEFAULT_CACHE_CHECK_OPTION2
+#else
+#  define DEFAULT_CACHE_CHECK_OPTION1 "-q"
+#  define DEFAULT_CACHE_CHECK_OPTION2 ""
+#  define DEFAULT_CACHE_CHECK_OPTIONS_CONFIG "#S" DEFAULT_CACHE_CHECK_OPTION1
+#endif
+
+#define DEFAULT_CACHE_REPAIR_OPTION1 ""
+#define DEFAULT_CACHE_REPAIR_OPTIONS_CONFIG "#S" DEFAULT_CACHE_REPAIR_OPTION1
 #define DEFAULT_CACHE_POOL_METADATA_REQUIRE_SEPARATE_PVS 0
 #define DEFAULT_CACHE_POOL_CHUNK_SIZE 64 /* KB */
 #define DEFAULT_CACHE_POOL_MIN_METADATA_SIZE 2048  /* KB */
 #define DEFAULT_CACHE_POOL_MAX_METADATA_SIZE (16 * 1024 * 1024)  /* KB */
-#define DEFAULT_CACHE_POOL_CACHEMODE "writethrough"
-#define DEFAULT_CACHE_POOL_POLICY "mq"
+#define DEFAULT_CACHE_POLICY "mq"
+#define DEFAULT_CACHE_MODE "writethrough"

 #define DEFAULT_UMASK 0077

-#ifdef LVM1_FALLBACK
-#  define DEFAULT_FALLBACK_TO_LVM1 1
-#else
-#  define DEFAULT_FALLBACK_TO_LVM1 0
-#endif
-
 #define DEFAULT_FORMAT "lvm2"

 #define DEFAULT_STRIPESIZE 64	/* KB */
@@ -140,10 +152,6 @@
 #  define DEFAULT_LOG_FACILITY LOG_USER
 #endif

-#define DEFAULT_LOGGED_DEBUG_CLASSES (LOG_CLASS_MEM | LOG_CLASS_DEVS | \
-    LOG_CLASS_ACTIVATION | LOG_CLASS_ALLOC | LOG_CLASS_LVMETAD | \
-    LOG_CLASS_METADATA | LOG_CLASS_CACHE | LOG_CLASS_LOCKING)
-
 #define DEFAULT_SYSLOG 1
 #define DEFAULT_VERBOSE 0
 #define DEFAULT_SILENT 0
@@ -189,6 +197,7 @@
 #define DEFAULT_REP_QUOTED 1
 #define DEFAULT_REP_SEPARATOR " "
 #define DEFAULT_REP_LIST_ITEM_SEPARATOR ","
+#define DEFAULT_TIME_FORMAT "%Y-%m-%d %T %z"

 #define DEFAULT_LVS_COLS "lv_name,vg_name,lv_attr,lv_size,pool_lv,origin,data_percent,metadata_percent,move_pv,mirror_log,copy_percent,convert_lv"
 #define DEFAULT_VGS_COLS "vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free"
@@ -218,4 +227,6 @@
 #define DEFAULT_THIN_POOL_AUTOEXTEND_THRESHOLD 100
 #define DEFAULT_THIN_POOL_AUTOEXTEND_PERCENT 20

+#define DEFAULT_CY_LOCK_TYPE "sanlock"
+
 #endif				/* _LVM_DEFAULTS_H */
--- a/lib/datastruct/str_list.c
+++ b/lib/datastruct/str_list.c
@@ -71,6 +71,21 @@ int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str)
 	return str_list_add_no_dup_check(mem, sll, str);
 }

+/* Add contents of sll2 to sll */
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2)
+{
+	struct dm_str_list *sl;
+
+	if (!sll2)
+		return_0;
+
+	dm_list_iterate_items(sl, sll2)
+		if (!str_list_add(mem, sll, sl->str))
+			return_0;
+
+	return 1;
+}
+
 void str_list_del(struct dm_list *sll, const char *str)
 {
 	struct dm_list *slh, *slht;
--- a/lib/datastruct/str_list.h
+++ b/lib/datastruct/str_list.h
@@ -21,6 +21,7 @@ struct dm_pool;

 struct dm_list *str_list_create(struct dm_pool *mem);
 int str_list_add(struct dm_pool *mem, struct dm_list *sll, const char *str);
+int str_list_add_list(struct dm_pool *mem, struct dm_list *sll, struct dm_list *sll2);
 int str_list_add_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
 int str_list_add_h_no_dup_check(struct dm_pool *mem, struct dm_list *sll, const char *str);
 void str_list_del(struct dm_list *sll, const char *str);
--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@@ -681,10 +681,12 @@ static int _init_preferred_names(struct cmd_context *cmd)

 	_cache.preferred_names_matcher = NULL;

-	if (!(cn = find_config_tree_node(cmd, devices_preferred_names_CFG, NULL)) ||
+	if (!(cn = find_config_tree_array(cmd, devices_preferred_names_CFG, NULL)) ||
 	    cn->v->type == DM_CFG_EMPTY_ARRAY) {
-		log_very_verbose("devices/preferred_names not found in config file: "
-				 "using built-in preferences");
+		log_very_verbose("devices/preferred_names %s: "
+				 "using built-in preferences",
+				 cn && cn->v->type == DM_CFG_EMPTY_ARRAY ? "is empty"
+									 : "not found in config");
 		return 1;
 	}

@@ -943,7 +945,7 @@ struct device *dev_cache_get(const char *name, struct dev_filter *f)
 		if (d)
 			dm_hash_remove(_cache.names, name);
 		log_sys_very_verbose("stat", name);
-		return NULL;
+		d = NULL;
 	}

 	if (d && (buf.st_rdev != d->dev)) {
--- a/lib/device/dev-type.c
+++ b/lib/device/dev-type.c
@@ -225,6 +225,9 @@ int dev_subsystem_part_major(struct dev_types *dt, struct device *dev)

 const char *dev_subsystem_name(struct dev_types *dt, struct device *dev)
 {
+	if (MAJOR(dev->dev) == dt->device_mapper_major)
+		return "DM";
+
 	if (MAJOR(dev->dev) == dt->md_major)
 		return "MD";

@@ -430,7 +433,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
 	 */
 	if ((parts = dt->dev_type_array[major].max_partitions) > 1) {
 		if ((residue = minor % parts)) {
-			*result = MKDEV((dev_t)major, (minor - residue));
+			*result = MKDEV((dev_t)major, (dev_t)(minor - residue));
 			ret = 2;
 		} else {
 			*result = dev->dev;
@@ -507,7 +510,7 @@ int dev_get_primary_dev(struct dev_types *dt, struct device *dev, dev_t *result)
 			  path, buffer);
 		goto out;
 	}
-	*result = MKDEV((dev_t)major, minor);
+	*result = MKDEV((dev_t)major, (dev_t)minor);
 	ret = 2;
 out:
 	if (fp && fclose(fp))
@@ -525,12 +528,14 @@ static inline int _type_in_flag_list(const char *type, uint32_t flag_list)
 		((flag_list & TYPE_DM_SNAPSHOT_COW) && !strcmp(type, "DM_snapshot_cow")));
 }

+#define MSG_FAILED_SIG_OFFSET "Failed to get offset of the %s signature on %s."
+#define MSG_FAILED_SIG_LENGTH "Failed to get length of the %s signature on %s."
+#define MSG_WIPING_SKIPPED " Wiping skipped."
+
 static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
 		       uint32_t types_to_exclude, uint32_t types_no_prompt,
 		       int yes, force_t force)
 {
-	static const char _msg_failed_offset[] = "Failed to get offset of the %s signature on %s.";
-	static const char _msg_failed_length[] = "Failed to get length of the %s signature on %s.";
 	static const char _msg_wiping[] = "Wiping %s signature on %s.";
 	const char *offset = NULL, *type = NULL, *magic = NULL,
 		   *usage = NULL, *label = NULL, *uuid = NULL;
@@ -541,21 +546,41 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name,
 		if (_type_in_flag_list(type, types_to_exclude))
 			return 2;
 		if (blkid_probe_lookup_value(probe, "SBMAGIC_OFFSET", &offset, NULL)) {
-			log_error(_msg_failed_offset, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_OFFSET, type, name);
+				return 0;
+			} else {
+				log_error("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		if (blkid_probe_lookup_value(probe, "SBMAGIC", &magic, &len)) {
-			log_error(_msg_failed_length, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_LENGTH, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 	} else if (!blkid_probe_lookup_value(probe, "PTTYPE", &type, NULL)) {
 		if (blkid_probe_lookup_value(probe, "PTMAGIC_OFFSET", &offset, NULL)) {
-			log_error(_msg_failed_offset, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_OFFSET, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_OFFSET MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		if (blkid_probe_lookup_value(probe, "PTMAGIC", &magic, &len)) {
-			log_error(_msg_failed_length, type, name);
-			return 0;
+			if (force < DONT_PROMPT) {
+				log_error(MSG_FAILED_SIG_LENGTH, type, name);
+				return 0;
+			} else {
+				log_warn("WARNING: " MSG_FAILED_SIG_LENGTH MSG_WIPING_SKIPPED, type, name);
+				return 2;
+			}
 		}
 		usage = "partition table";
 	} else
@@ -799,7 +824,7 @@ static unsigned long _dev_topology_attribute(struct dev_types *dt,
 	}

 	log_very_verbose("Device %s: %s is %lu%s.",
-			 dev_name(dev), attribute, result, default_value ? "" : " bytes");
+			 dev_name(dev), attribute, value, default_value ? "" : " bytes");

 	result = value >> SECTOR_SHIFT;

--- a/lib/display/display.c
+++ b/lib/display/display.c
@@ -24,10 +24,6 @@

 #include <stdarg.h>

-#define SIZE_BUF 128
-
-typedef enum { SIZE_LONG = 0, SIZE_SHORT = 1, SIZE_UNIT = 2 } size_len_t;
-
 static const struct {
 	alloc_policy_t alloc;
 	const char str[14]; /* must be changed when size extends 13 chars */
@@ -86,6 +82,38 @@ alloc_policy_t get_alloc_from_string(const char *str)
 	return ALLOC_INVALID;
 }

+const char *get_lock_type_string(lock_type_t lock_type)
+{
+	switch (lock_type) {
+	case LOCK_TYPE_INVALID:
+		return "invalid";
+	case LOCK_TYPE_NONE:
+		return "none";
+	case LOCK_TYPE_CLVM:
+		return "clvm";
+	case LOCK_TYPE_DLM:
+		return "dlm";
+	case LOCK_TYPE_SANLOCK:
+		return "sanlock";
+	}
+	return "invalid";
+}
+
+lock_type_t get_lock_type_from_string(const char *str)
+{
+	if (!str)
+		return LOCK_TYPE_NONE;
+	if (!strcmp(str, "none"))
+		return LOCK_TYPE_NONE;
+	if (!strcmp(str, "clvm"))
+		return LOCK_TYPE_CLVM;
+	if (!strcmp(str, "dlm"))
+		return LOCK_TYPE_DLM;
+	if (!strcmp(str, "sanlock"))
+		return LOCK_TYPE_SANLOCK;
+	return LOCK_TYPE_INVALID;
+}
+
 static const char *_percent_types[7] = { "NONE", "VG", "FREE", "LV", "PVS", "ORIGIN" };

 const char *get_percent_string(percent_type_t def)
@@ -95,168 +123,49 @@ const char *get_percent_string(percent_type_t def)

 const char *display_lvname(const struct logical_volume *lv)
 {
-	/* On allocation failure, just return the LV name. */
-	return lv_fullname_dup(lv->vg->cmd->mem, lv) ? : lv->name;
-}
+	char *name;
+	int r;

-#define BASE_UNKNOWN 0
-#define BASE_SHARED 1
-#define BASE_1024 8
-#define BASE_1000 15
-#define BASE_SPECIAL 21
-#define NUM_UNIT_PREFIXES 6
-#define NUM_SPECIAL 3
+	if ((lv->vg->cmd->display_lvname_idx + NAME_LEN) >= sizeof((lv->vg->cmd->display_buffer)))
+		lv->vg->cmd->display_lvname_idx = 0;
+
+	name = lv->vg->cmd->display_buffer + lv->vg->cmd->display_lvname_idx;
+	r = dm_snprintf(name, NAME_LEN, "%s/%s", lv->vg->name, lv->name);
+
+	if (r < 0) {
+		log_error("Full LV name \"%s/%s\" is too long.", lv->vg->name, lv->name);
+		return NULL;
+	}
+
+	lv->vg->cmd->display_lvname_idx += r + 1;
+
+	return name;
+}

 /* Size supplied in sectors */
 static const char *_display_size(const struct cmd_context *cmd,
-				 uint64_t size, size_len_t sl)
+				 uint64_t size, dm_size_suffix_t suffix_type)
 {
-	unsigned base = BASE_UNKNOWN;
-	unsigned s;
-	int suffix, precision;
-	uint64_t byte = UINT64_C(0);
-	uint64_t units = UINT64_C(1024);
-	char *size_buf = NULL;
-	const char * const size_str[][3] = {
-		/* BASE_UNKNOWN */
-		{"         ", "   ", " "},	/* [0] */
-
-		/* BASE_SHARED - Used if cmd->si_unit_consistency = 0 */
-		{" Exabyte", " EB", "E"},	/* [1] */
-		{" Petabyte", " PB", "P"},	/* [2] */
-		{" Terabyte", " TB", "T"},	/* [3] */
-		{" Gigabyte", " GB", "G"},	/* [4] */
-		{" Megabyte", " MB", "M"},	/* [5] */
-		{" Kilobyte", " KB", "K"},	/* [6] */
-		{" Byte    ", " B", "B"},	/* [7] */
-
-		/* BASE_1024 - Used if cmd->si_unit_consistency = 1 */
-		{" Exbibyte", " EiB", "e"},	/* [8] */
-		{" Pebibyte", " PiB", "p"},	/* [9] */
-		{" Tebibyte", " TiB", "t"},	/* [10] */
-		{" Gibibyte", " GiB", "g"},	/* [11] */
-		{" Mebibyte", " MiB", "m"},	/* [12] */
-		{" Kibibyte", " KiB", "k"},	/* [13] */
-		{" Byte    ", " B", "b"},	/* [14] */
-
-		/* BASE_1000 - Used if cmd->si_unit_consistency = 1 */
-		{" Exabyte",  " EB", "E"},	/* [15] */
-		{" Petabyte", " PB", "P"},	/* [16] */
-		{" Terabyte", " TB", "T"},	/* [17] */
-		{" Gigabyte", " GB", "G"},	/* [18] */
-		{" Megabyte", " MB", "M"},	/* [19] */
-		{" Kilobyte", " kB", "K"},	/* [20] */
-
-		/* BASE_SPECIAL */
-		{" Byte    ", " B ", "B"},	/* [21] (shared with BASE_1000) */
-		{" Units   ", " Un", "U"},	/* [22] */
-		{" Sectors ", " Se", "S"},	/* [23] */
-	};
-
-	if (!(size_buf = dm_pool_alloc(cmd->mem, SIZE_BUF))) {
-		log_error("no memory for size display buffer");
-		return "";
-	}
-
-	suffix = cmd->current_settings.suffix;
-
-	if (!cmd->si_unit_consistency) {
-		/* Case-independent match */
-		for (s = 0; s < NUM_UNIT_PREFIXES; s++)
-			if (toupper((int) cmd->current_settings.unit_type) ==
-			    *size_str[BASE_SHARED + s][2]) {
-				base = BASE_SHARED;
-				break;
-			}
-	} else {
-		/* Case-dependent match for powers of 1000 */
-		for (s = 0; s < NUM_UNIT_PREFIXES; s++)
-			if (cmd->current_settings.unit_type ==
-			    *size_str[BASE_1000 + s][2]) {
-				base = BASE_1000;
-				break;
-			}
-
-		/* Case-dependent match for powers of 1024 */
-		if (base == BASE_UNKNOWN)
-			for (s = 0; s < NUM_UNIT_PREFIXES; s++)
-			if (cmd->current_settings.unit_type ==
-			    *size_str[BASE_1024 + s][2]) {
-				base = BASE_1024;
-				break;
-			}
-	}
-
-	if (base == BASE_UNKNOWN)
-		/* Check for special units - s, b or u */
-		for (s = 0; s < NUM_SPECIAL; s++)
-			if (toupper((int) cmd->current_settings.unit_type) ==
-			    *size_str[BASE_SPECIAL + s][2]) {
-				base = BASE_SPECIAL;
-				break;
-			}
-
-	if (size == UINT64_C(0)) {
-		if (base == BASE_UNKNOWN)
-			s = 0;
-		sprintf(size_buf, "0%s", suffix ? size_str[base + s][sl] : "");
-		return size_buf;
-	}
-
-	size *= UINT64_C(512);
-
-	if (base != BASE_UNKNOWN)
-		byte = cmd->current_settings.unit_factor;
-	else {
-		/* Human-readable style */
-		if (cmd->current_settings.unit_type == 'H') {
-			units = UINT64_C(1000);
-			base = BASE_1000;
-		} else {
-			units = UINT64_C(1024);
-			base = BASE_1024;
-		}
-
-		if (!cmd->si_unit_consistency)
-			base = BASE_SHARED;
-
-		byte = units * units * units * units * units * units;
-
-		for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++)
-			byte /= units;
-
-		suffix = 1;
-	}
-
-	/* FIXME Make precision configurable */
-	switch (toupper(*size_str[base + s][SIZE_UNIT])) {
-	case 'B':
-	case 'S':
-		precision = 0;
-		break;
-	default:
-		precision = 2;
-	}
-
-	snprintf(size_buf, SIZE_BUF - 1, "%.*f%s", precision,
-		 (double) size / byte, suffix ? size_str[base + s][sl] : "");
-
-	return size_buf;
+	return dm_size_to_string(cmd->mem, size, cmd->current_settings.unit_type,
+				 cmd->si_unit_consistency, 
+				 cmd->current_settings.unit_factor,
+				 cmd->current_settings.suffix,
+				 suffix_type);
 }

 const char *display_size_long(const struct cmd_context *cmd, uint64_t size)
 {
-	return _display_size(cmd, size, SIZE_LONG);
+	return _display_size(cmd, size, DM_SIZE_LONG);
 }

 const char *display_size_units(const struct cmd_context *cmd, uint64_t size)
 {
-	return _display_size(cmd, size, SIZE_UNIT);
+	return _display_size(cmd, size, DM_SIZE_UNIT);
 }

 const char *display_size(const struct cmd_context *cmd, uint64_t size)
 {
-	return _display_size(cmd, size, SIZE_SHORT);
+	return _display_size(cmd, size, DM_SIZE_SHORT);
 }

 void pvdisplay_colons(const struct physical_volume *pv)
@@ -474,7 +383,7 @@ int lvdisplay_full(struct cmd_context *cmd,
 	log_print("LV UUID                %s", uuid);
 	log_print("LV Write Access        %s", access_str);
 	log_print("LV Creation host, time %s, %s",
-		  lv_host_dup(cmd->mem, lv), lv_time_dup(cmd->mem, lv));
+		  lv_host_dup(cmd->mem, lv), lv_time_dup(cmd->mem, lv, 1));

 	if (lv_is_origin(lv)) {
 		log_print("LV snapshot status     source of");
--- a/lib/display/display.h
+++ b/lib/display/display.h
@@ -64,6 +64,9 @@ const char *get_alloc_string(alloc_policy_t alloc);
 char alloc_policy_char(alloc_policy_t alloc);
 alloc_policy_t get_alloc_from_string(const char *str);

+const char *get_lock_type_string(lock_type_t lock_type);
+lock_type_t get_lock_type_from_string(const char *str);
+
 const char *get_percent_string(percent_type_t def);

 char yes_no_prompt(const char *prompt, ...) __attribute__ ((format(printf, 1, 2)));
--- a/lib/format1/import-export.c
+++ b/lib/format1/import-export.c
@@ -164,7 +164,7 @@ int export_pv(struct cmd_context *cmd, struct dm_pool *mem __attribute__((unused
 	/* Is VG already exported or being exported? */
 	if (vg && vg_is_exported(vg)) {
 		/* Does system_id need setting? */
-		if (!*vg->lvm1_system_id ||
+		if (!vg->lvm1_system_id || !*vg->lvm1_system_id ||
 		    strncmp(vg->lvm1_system_id, EXPORTED_TAG,
 			    sizeof(EXPORTED_TAG) - 1)) {
 			if (!generate_lvm1_system_id(cmd, (char *)pvd->system_id, EXPORTED_TAG))
@@ -180,7 +180,7 @@ int export_pv(struct cmd_context *cmd, struct dm_pool *mem __attribute__((unused
 	}

 	/* Is VG being imported? */
-	if (vg && !vg_is_exported(vg) && *vg->lvm1_system_id &&
+	if (vg && !vg_is_exported(vg) && vg->lvm1_system_id && *vg->lvm1_system_id &&
 	    !strncmp(vg->lvm1_system_id, EXPORTED_TAG, sizeof(EXPORTED_TAG) - 1)) {
 		if (!generate_lvm1_system_id(cmd, (char *)pvd->system_id, IMPORTED_TAG))
 			return_0;
@@ -192,7 +192,7 @@ int export_pv(struct cmd_context *cmd, struct dm_pool *mem __attribute__((unused
 			return_0;

 	/* Update internal system_id if we changed it */
-	if (vg &&
+	if (vg && vg->lvm1_system_id &&
 	    (!*vg->lvm1_system_id ||
 	     strncmp(vg->lvm1_system_id, (char *)pvd->system_id, sizeof(pvd->system_id))))
 		    strncpy(vg->lvm1_system_id, (char *)pvd->system_id, NAME_LEN);
--- a/lib/format_text/export.c
+++ b/lib/format_text/export.c
@@ -372,19 +372,61 @@ static int _print_flag_config(struct formatter *f, uint64_t status, int type)
 	return 1;
 }

-
-static int _out_tags(struct formatter *f, struct dm_list *tagsl)
+static char *_alloc_printed_str_list(struct dm_list *list)
 {
-	char *tag_buffer;
+	struct dm_str_list *sl;
+	int first = 1;
+	size_t size = 0;
+	char *buffer, *buf;

-	if (!dm_list_empty(tagsl)) {
-		if (!(tag_buffer = alloc_printed_tags(tagsl)))
+	dm_list_iterate_items(sl, list)
+		/* '"' + item + '"' + ',' + ' ' */
+		size += strlen(sl->str) + 4;
+	/* '[' + ']' + '\0' */
+	size += 3;
+
+	if (!(buffer = buf = dm_malloc(size))) {
+		log_error("Could not allocate memory for string list buffer.");
+		return NULL;
+	}
+
+	if (!emit_to_buffer(&buf, &size, "["))
+		goto_bad;
+
+	dm_list_iterate_items(sl, list) {
+		if (!first) {
+			if (!emit_to_buffer(&buf, &size, ", "))
+				goto_bad;
+		} else
+			first = 0;
+
+		if (!emit_to_buffer(&buf, &size, "\"%s\"", sl->str))
+			goto_bad;
+	}
+
+	if (!emit_to_buffer(&buf, &size, "]"))
+		goto_bad;
+
+	return buffer;
+
+bad:
+	dm_free(buffer);
+	return_NULL;
+}
+
+static int _out_list(struct formatter *f, struct dm_list *list,
+		     const char *list_name)
+{
+	char *buffer;
+
+	if (!dm_list_empty(list)) {
+		if (!(buffer = _alloc_printed_str_list(list)))
 			return_0;
-		if (!out_text(f, "tags = %s", tag_buffer)) {
-			dm_free(tag_buffer);
+		if (!out_text(f, "%s = %s", list_name, buffer)) {
+			dm_free(buffer);
 			return_0;
 		}
-		dm_free(tag_buffer);
+		dm_free(buffer);
 	}

 	return 1;
@@ -422,7 +464,7 @@ static int _print_vg(struct formatter *f, struct volume_group *vg)
 	if (!_print_flag_config(f, status, VG_FLAGS))
 		return_0;

-	if (!_out_tags(f, &vg->tags))
+	if (!_out_list(f, &vg->tags, "tags"))
 		return_0;
 
 	if (vg->system_id && *vg->system_id)
@@ -430,8 +472,11 @@ static int _print_vg(struct formatter *f, struct volume_group *vg)
 	else if (vg->lvm1_system_id && *vg->lvm1_system_id)
 		outf(f, "system_id = \"%s\"", vg->lvm1_system_id);

-	if (vg->lock_type)
+	if (vg->lock_type) {
 		outf(f, "lock_type = \"%s\"", vg->lock_type);
+		if (vg->lock_args)
+			outf(f, "lock_args = \"%s\"", vg->lock_args);
+	}

 	outsize(f, (uint64_t) vg->extent_size, "extent_size = %u",
 		vg->extent_size);
@@ -509,7 +554,7 @@ static int _print_pvs(struct formatter *f, struct volume_group *vg)
 		if (!_print_flag_config(f, pv->status, PV_FLAGS))
 			return_0;

-		if (!_out_tags(f, &pv->tags))
+		if (!_out_list(f, &pv->tags, "tags"))
 			return_0;

 		outsize(f, pv->size, "dev_size = %" PRIu64, pv->size);
@@ -545,7 +590,7 @@ static int _print_segment(struct formatter *f, struct volume_group *vg,
 	outnl(f);
 	outf(f, "type = \"%s\"", seg->segtype->name);

-	if (!_out_tags(f, &seg->tags))
+	if (!_out_list(f, &seg->tags, "tags"))
 		return_0;

 	if (seg->segtype->ops->text_export &&
@@ -641,7 +686,7 @@ static int _print_lv(struct formatter *f, struct logical_volume *lv)
 	if (!_print_flag_config(f, status, LV_FLAGS))
 		return_0;

-	if (!_out_tags(f, &lv->tags))
+	if (!_out_list(f, &lv->tags, "tags"))
 		return_0;

 	if (lv->timestamp) {
@@ -657,6 +702,9 @@ static int _print_lv(struct formatter *f, struct logical_volume *lv)
 		      lv->timestamp);
 	}

+	if (lv->lock_args)
+		outf(f, "lock_args = \"%s\"", lv->lock_args);
+
 	if (lv->alloc != ALLOC_INHERIT)
 		outf(f, "allocation_policy = \"%s\"",
 		     get_alloc_string(lv->alloc));
--- a/lib/format_text/flags.c
+++ b/lib/format_text/flags.c
@@ -67,6 +67,7 @@ static const struct flag _lv_flags[] = {
 	{LV_NOSCAN, NULL, 0},
 	{LV_TEMPORARY, NULL, 0},
 	{POOL_METADATA_SPARE, NULL, 0},
+	{LOCKD_SANLOCK_LV, NULL, 0},
 	{RAID, NULL, 0},
 	{RAID_META, NULL, 0},
 	{RAID_IMAGE, NULL, 0},
@@ -92,6 +93,7 @@ static const struct flag _lv_flags[] = {
 	{CACHE_POOL_DATA, NULL, 0},
 	{CACHE_POOL_METADATA, NULL, 0},
 	{LV_PENDING_DELETE, NULL, 0}, /* FIXME Display like COMPATIBLE_FLAG */
+	{LV_REMOVED, NULL, 0},
 	{0, NULL, 0}
 };

--- a/lib/format_text/format-text.c
+++ b/lib/format_text/format-text.c
@@ -1318,7 +1318,7 @@ static int _write_single_mda(struct metadata_area *mda, void *baton)
 	return 1;
 }

-/* Only for orphans */
+/* Only for orphans - FIXME That's not true any more */
 static int _text_pv_write(const struct format_type *fmt, struct physical_volume *pv)
 {
 	struct format_instance *fid = pv->fid;
@@ -1332,7 +1332,8 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume

 	/* Add a new cache entry with PV info or update existing one. */
 	if (!(info = lvmcache_add(fmt->labeller, (const char *) &pv->id,
-				  pv->dev, pv->vg_name, NULL, 0)))
+				  pv->dev, pv->vg_name,
+				  is_orphan_vg(pv->vg_name) ? pv->vg_name : pv->vg ? (const char *) &pv->vg->id : NULL, 0)))
 		return_0;

 	label = lvmcache_get_label(info);
@@ -1602,9 +1603,9 @@ static int _text_pv_initialise(const struct format_type *fmt,
 	if (rp->extent_count)
 		pv->pe_count = rp->extent_count;

-	if ((pv->pe_start + pv->pe_count * pv->pe_size - 1) > (pv->size << SECTOR_SHIFT)) {
+	if ((pv->pe_start + pv->pe_count * (uint64_t)pv->pe_size - 1) > pv->size) {
 		log_error("Physical extents end beyond end of device %s.",
-			   pv_dev_name(pv));
+			  pv_dev_name(pv));
 		return 0;
 	}

@@ -2171,7 +2172,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt,
 		 * LABEL_SCAN_SIZE.
 		 */
 		pe_end = pv->pe_count ? (pv->pe_start +
-					 pv->pe_count * pv->pe_size - 1) << SECTOR_SHIFT
+					 pv->pe_count * (uint64_t)pv->pe_size - 1) << SECTOR_SHIFT
 				      : 0;

 		if (pe_start || pe_start_locked) {
@@ -2236,7 +2237,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt,
 	if (limit_applied)
 		log_very_verbose("Using limited metadata area size on %s "
 				 "with value %" PRIu64 " (limited by %s of "
-				 "%" PRIu64 ").", pv_dev_name(pv),
+				 FMTu64 ").", pv_dev_name(pv),
 				  mda_size, limit_name, limit);

 	if (mda_size) {
@@ -2490,7 +2491,7 @@ struct format_type *create_text_format(struct cmd_context *cmd)
 		goto bad;
 	}

-	if ((cn = find_config_tree_node(cmd, metadata_dirs_CFG, NULL))) {
+	if ((cn = find_config_tree_array(cmd, metadata_dirs_CFG, NULL))) {
 		for (cv = cn->v; cv; cv = cv->next) {
 			if (cv->type != DM_CFG_STRING) {
 				log_error("Invalid string in config file: "
--- a/lib/format_text/import-export.h
+++ b/lib/format_text/import-export.h
@@ -47,7 +47,8 @@ struct text_vg_version_ops {
 	int (*check_version) (const struct dm_config_tree * cf);
 	struct volume_group *(*read_vg) (struct format_instance * fid,
 					 const struct dm_config_tree *cf,
-					 unsigned use_cached_pvs);
+					 unsigned use_cached_pvs,
+					 unsigned allow_lvmetad_extensions);
 	void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf,
 			   time_t *when, char **desc);
 	int (*read_vgname) (const struct format_type *fmt,
@@ -60,9 +61,6 @@ struct text_vg_version_ops *text_vg_vsn1_init(void);
 int print_flags(uint64_t status, int type, char *buffer, size_t size);
 int read_flags(uint64_t *status, int type, const struct dm_config_value *cv);

-char *alloc_printed_tags(struct dm_list *tags);
-int read_tags(struct dm_pool *mem, struct dm_list *tags, const struct dm_config_value *cv);
-
 int text_vg_export_file(struct volume_group *vg, const char *desc, FILE *fp);
 size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf);
 struct volume_group *text_vg_import_file(struct format_instance *fid,
--- a/lib/format_text/import.c
+++ b/lib/format_text/import.c
@@ -146,7 +146,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid,
 		if (!(*vsn)->check_version(cft))
 			continue;

-		if (!(vg = (*vsn)->read_vg(fid, cft, single_device)))
+		if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0)))
 			goto_out;

 		(*vsn)->read_desc(vg->vgmem, cft, when, desc);
@@ -174,8 +174,9 @@ struct volume_group *text_vg_import_file(struct format_instance *fid,
 				 when, desc);
 }

-struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
-						struct format_instance *fid)
+static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft,
+							struct format_instance *fid,
+							unsigned allow_lvmetad_extensions)
 {
 	struct volume_group *vg = NULL;
 	struct text_vg_version_ops **vsn;
@@ -190,7 +191,7 @@ struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft
 		 * The only path to this point uses cached vgmetadata,
 		 * so it can use cached PV state too.
 		 */
-		if (!(vg = (*vsn)->read_vg(fid, cft, 1)))
+		if (!(vg = (*vsn)->read_vg(fid, cft, 1, allow_lvmetad_extensions)))
 			stack;
 		else if ((vg_missing = vg_missing_pv_count(vg))) {
 			log_verbose("There are %d physical volumes missing.",
@@ -203,3 +204,15 @@ struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft

 	return vg;
 }
+
+struct volume_group *import_vg_from_config_tree(const struct dm_config_tree *cft,
+						struct format_instance *fid)
+{
+	return _import_vg_from_config_tree(cft, fid, 0);
+}
+
+struct volume_group *import_vg_from_lvmetad_config_tree(const struct dm_config_tree *cft,
+							struct format_instance *fid)
+{
+	return _import_vg_from_config_tree(cft, fid, 1);
+}
--- a/lib/format_text/import_vsn1.c
+++ b/lib/format_text/import_vsn1.c
@@ -20,11 +20,13 @@
 #include "toolcontext.h"
 #include "lvmcache.h"
 #include "lvmetad.h"
+#include "lvmlockd.h"
 #include "lv_alloc.h"
 #include "pv_alloc.h"
 #include "segtype.h"
 #include "text_import.h"
 #include "defaults.h"
+#include "str_list.h"

 typedef int (*section_fn) (struct format_instance * fid,
 			   struct volume_group * vg, const struct dm_config_node * pvn,
@@ -153,6 +155,26 @@ static int _read_flag_config(const struct dm_config_node *n, uint64_t *status, i
 	return 1;
 }

+static int _read_str_list(struct dm_pool *mem, struct dm_list *list, const struct dm_config_value *cv)
+{
+	if (cv->type == DM_CFG_EMPTY_ARRAY)
+		return 1;
+
+	while (cv) {
+		if (cv->type != DM_CFG_STRING) {
+			log_error("Found an item that is not a string");
+			return 0;
+		}
+
+		if (!str_list_add(mem, list, dm_pool_strdup(mem, cv->v.str)))
+			return_0;
+
+		cv = cv->next;
+	}
+
+	return 1;
+}
+
 static int _read_pv(struct format_instance *fid,
 		    struct volume_group *vg, const struct dm_config_node *pvn,
 		    const struct dm_config_node *vgn __attribute__((unused)),
@@ -167,6 +189,8 @@ static int _read_pv(struct format_instance *fid,
 	const struct dm_config_value *cv;
 	uint64_t size, ba_start;

+	int outdated = !strcmp(pvn->parent->key, "outdated_pvs");
+
 	if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl))) ||
 	    !(pvl->pv = dm_pool_zalloc(mem, sizeof(*pvl->pv))))
 		return_0;
@@ -212,7 +236,7 @@ static int _read_pv(struct format_instance *fid,

 	memcpy(&pv->vgid, &vg->id, sizeof(vg->id));

-	if (!_read_flag_config(pvn, &pv->status, PV_FLAGS)) {
+	if (!outdated && !_read_flag_config(pvn, &pv->status, PV_FLAGS)) {
 		log_error("Couldn't read status flags for physical volume.");
 		return 0;
 	}
@@ -234,13 +258,13 @@ static int _read_pv(struct format_instance *fid,
 		return 0;
 	}

-	if (!_read_uint64(pvn, "pe_start", &pv->pe_start)) {
+	if (!outdated && !_read_uint64(pvn, "pe_start", &pv->pe_start)) {
 		log_error("Couldn't read extent start value (pe_start) "
 			  "for physical volume.");
 		return 0;
 	}

-	if (!_read_int32(pvn, "pe_count", &pv->pe_count)) {
+	if (!outdated && !_read_int32(pvn, "pe_count", &pv->pe_count)) {
 		log_error("Couldn't find extent count (pe_count) for "
 			  "physical volume.");
 		return 0;
@@ -251,7 +275,7 @@ static int _read_pv(struct format_instance *fid,
 	_read_uint64(pvn, "ba_start", &ba_start);
 	_read_uint64(pvn, "ba_size", &size);
 	if (ba_start && size) {
-		log_debug("Found bootloader area specification for PV %s "
+		log_debug_metadata("Found bootloader area specification for PV %s "
 			  "in metadata: ba_start=%" PRIu64 ", ba_size=%" PRIu64 ".",
 			  pv_dev_name(pv), ba_start, size);
 		pv->ba_start = ba_start;
@@ -267,7 +291,7 @@ static int _read_pv(struct format_instance *fid,

 	/* Optional tags */
 	if (dm_config_get_list(pvn, "tags", &cv) &&
-	    !(read_tags(mem, &pv->tags, cv))) {
+	    !(_read_str_list(mem, &pv->tags, cv))) {
 		log_error("Couldn't read tags for physical volume %s in %s.",
 			  pv_dev_name(pv), vg->name);
 		return 0;
@@ -299,7 +323,10 @@ static int _read_pv(struct format_instance *fid,

 	vg->extent_count += pv->pe_count;
 	vg->free_count += pv->pe_count;
-	add_pvl_to_vgs(vg, pvl);
+	if (outdated)
+		dm_list_add(&vg->pvs_outdated, &pvl->list);
+	else
+		add_pvl_to_vgs(vg, pvl);

 	return 1;
 }
@@ -375,7 +402,7 @@ static int _read_segment(struct logical_volume *lv, const struct dm_config_node

 	/* Optional tags */
 	if (dm_config_get_list(sn_child, "tags", &cv) &&
-	    !(read_tags(mem, &seg->tags, cv))) {
+	    !(_read_str_list(mem, &seg->tags, cv))) {
 		log_error("Couldn't read tags for a segment of %s/%s.",
 			  lv->vg->name, lv->name);
 		return 0;
@@ -573,6 +600,30 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
 		return 0;
 	}

+	/*
+	 * The LV lock_args string is generated in lvmlockd, and the content
+	 * depends on the lock_type.
+	 *
+	 * lock_type dlm does not use LV lock_args, so the LV lock_args field
+	 * is just set to "dlm".
+	 *
+	 * lock_type sanlock uses the LV lock_args field to save the
+	 * location on disk of that LV's sanlock lock.  The disk name is
+	 * specified in the VG lock_args.  The lock_args string begins
+	 * with a version number, e.g. 1.0.0, followed by a colon, followed
+	 * by a number.  The number is the offset on disk where sanlock is
+	 * told to find the LV's lock.
+	 * e.g. lock_args = 1.0.0:70254592
+	 * means that the lock is located at offset 70254592.
+	 *
+	 * The lvmlockd code for each specific lock manager also validates
+	 * the lock_args before using it to access the lock manager.
+	 */
+	if (dm_config_get_str(lvn, "lock_args", &str)) {
+		if (!(lv->lock_args = dm_pool_strdup(mem, str)))
+			return_0;
+	}
+
 	lv->alloc = ALLOC_INHERIT;
 	if (dm_config_get_str(lvn, "allocation_policy", &str)) {
 		lv->alloc = get_alloc_from_string(str);
@@ -611,7 +662,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),

 	/* Optional tags */
 	if (dm_config_get_list(lvn, "tags", &cv) &&
-	    !(read_tags(mem, &lv->tags, cv))) {
+	    !(_read_str_list(mem, &lv->tags, cv))) {
 		log_error("Couldn't read tags for logical volume %s/%s.",
 			  vg->name, lv->name);
 		return 0;
@@ -638,6 +689,12 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)),
 		vg->pool_metadata_spare_lv = lv;
 	}

+	if (!lv_is_visible(lv) && !strcmp(lv->name, LOCKD_SANLOCK_LV_NAME)) {
+		log_debug_metadata("Logical volume %s is sanlock lv.", lv->name);
+		lv->status |= LOCKD_SANLOCK_LV;
+		vg->sanlock_lv = lv;
+	}
+
 	return 1;
 }

@@ -735,7 +792,8 @@ static int _read_sections(struct format_instance *fid,

 static struct volume_group *_read_vg(struct format_instance *fid,
 				     const struct dm_config_tree *cft,
-				     unsigned use_cached_pvs)
+				     unsigned use_cached_pvs,
+				     unsigned allow_lvmetad_extensions)
 {
 	const struct dm_config_node *vgn;
 	const struct dm_config_value *cv;
@@ -789,6 +847,32 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 			goto bad;
 	}

+	/*
+	 * The VG lock_args string is generated in lvmlockd, and the content
+	 * depends on the lock_type.  lvmlockd begins the lock_args string
+	 * with a version number, e.g. 1.0.0, followed by a colon, followed
+	 * by a string that depends on the lock manager.  The string after
+	 * the colon is information needed to use the lock manager for the VG.
+	 *
+	 * For sanlock, the string is the name of the internal LV used to store
+	 * sanlock locks.  lvmlockd needs to know where the locks are located
+	 * so it can pass that location to sanlock which needs to access the locks.
+	 * e.g. lock_args = 1.0.0:lvmlock
+	 * means that the locks are located on the the LV "lvmlock".
+	 *
+	 * For dlm, the string is the dlm cluster name.  lvmlockd needs to use
+	 * a dlm lockspace in this cluster to use the VG.
+	 * e.g. lock_args = 1.0.0:foo
+	 * means that the host needs to be a member of the cluster "foo".
+	 *
+	 * The lvmlockd code for each specific lock manager also validates
+	 * the lock_args before using it to access the lock manager.
+	 */
+	if (dm_config_get_str(vgn, "lock_args", &str)) {
+		if (!(vg->lock_args = dm_pool_strdup(vg->vgmem, str)))
+			goto bad;
+	}
+
 	if (!_read_id(&vg->id, vgn, "id")) {
 		log_error("Couldn't read uuid for volume group %s.", vg->name);
 		goto bad;
@@ -877,9 +961,15 @@ static struct volume_group *_read_vg(struct format_instance *fid,
 		goto bad;
 	}

+	if (allow_lvmetad_extensions)
+		_read_sections(fid, "outdated_pvs", _read_pv, vg,
+			       vgn, pv_hash, lv_hash, 1, &scan_done_once);
+	else if (dm_config_has_node(vgn, "outdated_pvs"))
+		log_error(INTERNAL_ERROR "Unexpected outdated_pvs section in metadata of VG %s.", vg->name);
+
 	/* Optional tags */
 	if (dm_config_get_list(vgn, "tags", &cv) &&
-	    !(read_tags(vg->vgmem, &vg->tags, cv))) {
+	    !(_read_str_list(vg->vgmem, &vg->tags, cv))) {
 		log_error("Couldn't read tags for volume group %s.", vg->name);
 		goto bad;
 	}
@@ -941,6 +1031,11 @@ static void _read_desc(struct dm_pool *mem,
 	*when = u;
 }

+/*
+ * It would be more accurate to call this _read_vgsummary().
+ * It is used to read vgsummary information about a VG
+ * before locking and reading the VG via vg_read().
+ */
 static int _read_vgname(const struct format_type *fmt, const struct dm_config_tree *cft, 
 			struct lvmcache_vgsummary *vgsummary)
 {
@@ -977,6 +1072,8 @@ static int _read_vgname(const struct format_type *fmt, const struct dm_config_tr
 		return 0;
 	}

+	dm_config_get_str(vgn, "lock_type", &vgsummary->lock_type);
+
 	return 1;
 }

--- a/lib/format_text/tags.c
+++ b/lib/format_text/tags.c
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2003-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
- *
- * This file is part of LVM2.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU Lesser General Public License v.2.1.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#include "lib.h"
-#include "metadata.h"
-#include "import-export.h"
-#include "str_list.h"
-#include "lvm-string.h"
-
-char *alloc_printed_tags(struct dm_list *tagsl)
-{
-	struct dm_str_list *sl;
-	int first = 1;
-	size_t size = 0;
-	char *buffer, *buf;
-
-	dm_list_iterate_items(sl, tagsl)
-		/* '"' + tag + '"' + ',' + ' ' */
-		size += strlen(sl->str) + 4;
-	/* '[' + ']' + '\0' */
-	size += 3;
-
-	if (!(buffer = buf = dm_malloc(size))) {
-		log_error("Could not allocate memory for tag list buffer.");
-		return NULL;
-	}
-
-	if (!emit_to_buffer(&buf, &size, "["))
-		goto_bad;
-
-	dm_list_iterate_items(sl, tagsl) {
-		if (!first) {
-			if (!emit_to_buffer(&buf, &size, ", "))
-				goto_bad;
-		} else
-			first = 0;
-
-		if (!emit_to_buffer(&buf, &size, "\"%s\"", sl->str))
-			goto_bad;
-	}
-
-	if (!emit_to_buffer(&buf, &size, "]"))
-		goto_bad;
-
-	return buffer;
-
-bad:
-	dm_free(buffer);
-	return_NULL;
-}
-
-int read_tags(struct dm_pool *mem, struct dm_list *tagsl, const struct dm_config_value *cv)
-{
-	if (cv->type == DM_CFG_EMPTY_ARRAY)
-		return 1;
-
-	while (cv) {
-		if (cv->type != DM_CFG_STRING) {
-			log_error("Found a tag that is not a string");
-			return 0;
-		}
-
-		if (!str_list_add(mem, tagsl, dm_pool_strdup(mem, cv->v.str)))
-			return_0;
-
-		cv = cv->next;
-	}
-
-	return 1;
-}
--- a/lib/format_text/text_label.c
+++ b/lib/format_text/text_label.c
@@ -418,8 +418,8 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf,
 	if (!(ext_version = xlate32(pvhdr_ext->version)))
 		goto out;

-	log_debug("%s: PV header extension version %" PRIu32 " found",
-		  dev_name(dev), ext_version);
+	log_debug_metadata("%s: PV header extension version %" PRIu32 " found",
+			   dev_name(dev), ext_version);

 	/* Bootloader areas */
 	dlocn_xl = pvhdr_ext->bootloader_areas_xl;
--- a/lib/locking/locking.c
+++ b/lib/locking/locking.c
@@ -119,8 +119,9 @@ int init_locking(int type, struct cmd_context *cmd, int suppress_messages)
 	switch (type) {
 	case 0:
 		init_no_locking(&_locking, cmd, suppress_messages);
-		log_warn("WARNING: Locking disabled. Be careful! "
-			  "This could corrupt your metadata.");
+		log_warn_suppress(suppress_messages,
+			"WARNING: Locking disabled. Be careful! "
+			"This could corrupt your metadata.");
 		return 1;

 	case 1:
--- a/lib/locking/locking.h
+++ b/lib/locking/locking.h
@@ -195,9 +195,10 @@ int check_lvm1_vg_inactive(struct cmd_context *cmd, const char *vgname);

 #define unlock_vg(cmd, vol)	\
 	do { \
-		if (is_real_vg(vol)) \
-			sync_dev_names(cmd); \
-		(void) lock_vol(cmd, vol, LCK_VG_UNLOCK, NULL);	\
+		if (is_real_vg(vol) && !sync_dev_names(cmd)) \
+			stack; \
+		if (!lock_vol(cmd, vol, LCK_VG_UNLOCK, NULL)) \
+			stack;	\
 	} while (0)
 #define unlock_and_release_vg(cmd, vg, vol) \
 	do { \
--- a/Show More
+++ b/Show More