lvm2/lib/activate/dev_manager.c

/*
 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
 * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
 *
 * This file is part of LVM2.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
 * of the GNU Lesser General Public License v.2.1.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "lib/misc/lib.h"
#include "dev_manager.h"
#include "lib/misc/lvm-string.h"
#include "fs.h"
#include "lib/config/defaults.h"
#include "lib/metadata/segtype.h"
#include "lib/display/display.h"
#include "lib/commands/toolcontext.h"
#include "lib/activate/targets.h"
#include "lib/config/config.h"
#include "lib/activate/activate.h"
#include "lib/misc/lvm-exec.h"
#include "lib/datastruct/str_list.h"
#include "lib/misc/lvm-signal.h"

#include <limits.h>
#include <dirent.h>

#define MAX_TARGET_PARAMSIZE 50000
#define LVM_UDEV_NOSCAN_FLAG DM_SUBSYSTEM_UDEV_FLAG0
#define CRYPT_TEMP	"CRYPT-TEMP"
#define CRYPT_SUBDEV	"CRYPT-SUBDEV"
#define STRATIS		"stratis-"

typedef enum {
	PRELOAD,
	ACTIVATE,
	DEACTIVATE,
	SUSPEND,
	SUSPEND_WITH_LOCKFS,
	CLEAN
} action_t;

/* This list must match lib/misc/lvm-string.c:build_dm_uuid(). */
static const char * const _uuid_suffix_list[] = { "pool", "cdata", "cmeta", "cvol", "tdata", "tmeta", "vdata", "vpool", "imeta", NULL};

struct dlid_list {
	struct dm_list list;
	const char *dlid;
	const struct logical_volume *lv;
};

struct dev_manager {
	struct dm_pool *mem;

	struct cmd_context *cmd;

	void *target_state;
	uint32_t pvmove_mirror_count;
	int flush_required;
	int activation;                 /* building activation tree */
	int suspend;			/* building suspend tree */
	unsigned track_pending_delete;
	unsigned track_pvmove_deps;

	const char *vg_name;
};

struct lv_layer {
	const struct logical_volume *lv;
	const char *old_name;
	int visible_component;
};

int read_only_lv(const struct logical_volume *lv, const struct lv_activate_opts *laopts, const char *layer)
{
	if (layer && lv_is_cow(lv))
		return 0; /* Keep snapshot's COW volume writable */

	if (lv_is_raid_image(lv) || lv_is_raid_metadata(lv))
		return 0; /* Keep RAID SubLvs writable */

	if (!layer) {
		if (lv_is_thin_pool(lv) || lv_is_vdo_pool(lv))
			return 1;
	}

	return (laopts->read_only || !(lv->status & LVM_WRITE));
}

/*
 * Low level device-layer operations.
 *
 * Unless task is DM_DEVICE_TARGET_MSG, also calls dm_task_run()
 */
static struct dm_task *_setup_task_run(int task, struct dm_info *info,
				       const char *name, const char *uuid,
				       uint32_t *event_nr,
				       uint32_t major, uint32_t minor,
				       int with_open_count,
				       int with_flush,
				       int query_inactive)
{
	char vsn[80];
	unsigned maj, min;
	struct dm_task *dmt;

	if (!(dmt = dm_task_create(task)))
		return_NULL;

	if (name && !dm_task_set_name(dmt, name))
		goto_out;

	if (uuid && *uuid && !dm_task_set_uuid(dmt, uuid))
		goto_out;

	if (event_nr && !dm_task_set_event_nr(dmt, *event_nr))
		goto_out;

	if (major && !dm_task_set_major_minor(dmt, major, minor, 1))
		goto_out;

	if (activation_checks() && !dm_task_enable_checks(dmt))
		goto_out;

	if (query_inactive && !dm_task_query_inactive_table(dmt)) {
		log_error("Failed to set query_inactive_table.");
		goto out;
	}

	if (!with_open_count && !dm_task_no_open_count(dmt))
		log_warn("WARNING: Failed to disable open_count.");

	if (!with_flush && !dm_task_no_flush(dmt))
		log_warn("WARNING: Failed to set no_flush.");

	switch (task) {
	case DM_DEVICE_TARGET_MSG:
		return dmt; /* TARGET_MSG needs more local tweaking before task_run() */
	case DM_DEVICE_LIST:
		/* Use 'newuuid' only with DM version that supports it */
		if (driver_version(vsn, sizeof(vsn)) &&
		    (sscanf(vsn, "%u.%u", &maj, &min) == 2) &&
		    (maj == 4 ? min >= 19 : maj > 4) &&
		    !dm_task_set_newuuid(dmt, " ")) // new uuid has no meaning here
			log_warn("WARNING: Failed to query uuid with LIST.");
		break;
	default:
		break;
	}

	if (!dm_task_run(dmt))
		goto_out;

	if (info && !dm_task_get_info(dmt, info))
		goto_out;

	return dmt;

out:
	dm_task_destroy(dmt);

	return NULL;
}

/* Read info from DM VDO 'stats' message */
static int _vdo_pool_message_stats(struct dm_pool *mem,
				   const struct logical_volume *lv,
				   struct lv_status_vdo *status)
{
	const char *response;
	const char *dlid;
	struct dm_task *dmt = NULL;
	int r = 0;
	unsigned i;
	const char *p;
	struct vdo_msg_elem {
		const char *name;
		uint64_t *val;
	} const vme[] = { /* list of properties lvm2 wants to parse */
		{ "dataBlocksUsed", &status->data_blocks_used },
		{ "logicalBlocksUsed", &status->logical_blocks_used }
	};

	for (i = 0; i < DM_ARRAY_SIZE(vme); ++i)
		*vme[i].val = ULLONG_MAX;

	if (!(dlid = build_dm_uuid(mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TARGET_MSG, NULL, NULL, dlid, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (!dm_task_set_message(dmt, "stats"))
		goto_out;

	if (!dm_task_run(dmt))
		goto_out;

	log_debug_activation("Checking VDO pool stats message for LV %s.",
			     display_lvname(lv));

	if ((response = dm_task_get_message_response(dmt))) {
		for (i = 0; i < DM_ARRAY_SIZE(vme); ++i) {
			errno = 0;
			if (!(p = strstr(response,  vme[i].name)) ||
			    !(p = strchr(p, ':')) ||
			    ((*vme[i].val = strtoul(p + 1, NULL, 10)) == ULLONG_MAX) || errno) {
				log_debug("Cannot parse %s in VDO DM stats message.", vme[i].name);
				*vme[i].val = ULLONG_MAX;
				goto out;
			}
			if (*vme[i].val != ULLONG_MAX)
				log_debug("VDO property %s = " FMTu64, vme[i].name, *vme[i].val);
		}
	}

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

static int _get_segment_status_from_target_params(const char *target_name,
						  const char *params,
						  const struct dm_info *dminfo,
						  struct lv_seg_status *seg_status)
{
	const struct lv_segment *seg = seg_status->seg;
	const struct segment_type *segtype = seg->segtype;

	seg_status->type = SEG_STATUS_UNKNOWN; /* Parsing failed */

	/* Switch to snapshot segtype status logic for merging origin */
	/* This is 'dynamic' decision, both states are valid */
	if (lv_is_merging_origin(seg->lv)) {
		if (!strcmp(target_name, TARGET_NAME_SNAPSHOT_ORIGIN)) {
			seg_status->type = SEG_STATUS_NONE;
			return 1; /* Merge has not yet started */
		}
		if (!strcmp(target_name, TARGET_NAME_SNAPSHOT_MERGE) &&
		    !(segtype = get_segtype_from_string(seg->lv->vg->cmd, TARGET_NAME_SNAPSHOT)))
			return_0;
		/* Merging, parse 'snapshot' status of merge progress */
	}

	if (!params) {
		log_warn("WARNING: Cannot find matching %s segment for %s.",
			 segtype->name, display_lvname(seg_status->seg->lv));
		return 0;
	}

	/* Validate target_name segtype from DM table with lvm2 metadata segtype */
	if (!lv_is_locked(seg->lv) &&
	    strcmp(segtype->name, target_name) &&
	    /* If kernel's type isn't an exact match is it compatible? */
	    (!segtype->ops->target_status_compatible ||
	     !segtype->ops->target_status_compatible(target_name))) {
		log_warn("WARNING: Detected %s segment type does not match expected type %s for %s.",
			 target_name, segtype->name, display_lvname(seg_status->seg->lv));
		return 0;
	}

	/* TODO: move into segtype method */
	if (segtype_is_cache(segtype)) {
		if (!dm_get_status_cache(seg_status->mem, params, &(seg_status->cache)))
			return_0;
		seg_status->type = SEG_STATUS_CACHE;
	} else if (segtype_is_raid(segtype)) {
		if (!dm_get_status_raid(seg_status->mem, params, &seg_status->raid))
			return_0;
		seg_status->type = SEG_STATUS_RAID;
	} else if (segtype_is_thin_volume(segtype)) {
		if (!dm_get_status_thin(seg_status->mem, params, &seg_status->thin))
			return_0;
		seg_status->type = SEG_STATUS_THIN;
	} else if (segtype_is_thin_pool(segtype)) {
		if (!dm_get_status_thin_pool(seg_status->mem, params, &seg_status->thin_pool))
			return_0;
		seg_status->type = SEG_STATUS_THIN_POOL;
	} else if (segtype_is_snapshot(segtype)) {
		if (!dm_get_status_snapshot(seg_status->mem, params, &seg_status->snapshot))
			return_0;
		seg_status->type = SEG_STATUS_SNAPSHOT;
	} else if (segtype_is_vdo_pool(segtype)) {
		if (!_vdo_pool_message_stats(seg_status->mem, seg->lv, &seg_status->vdo_pool))
			stack;
		if (!parse_vdo_pool_status(seg_status->mem, seg->lv, params, dminfo, &seg_status->vdo_pool))
			return_0;
		seg_status->type = SEG_STATUS_VDO_POOL;
	} else if (segtype_is_writecache(segtype)) {
		if (!dm_get_status_writecache(seg_status->mem, params, &(seg_status->writecache)))
			return_0;
		seg_status->type = SEG_STATUS_WRITECACHE;
	} else if (segtype_is_integrity(segtype)) {
		if (!dm_get_status_integrity(seg_status->mem, params, &(seg_status->integrity)))
			return_0;
		seg_status->type = SEG_STATUS_INTEGRITY;
	} else
		/*
		 * TODO: Add support for other segment types too!
		 * Status not supported
		 */
		seg_status->type = SEG_STATUS_NONE;

	return 1;
}

typedef enum {
	INFO,	/* DM_DEVICE_INFO ioctl */
	STATUS, /* DM_DEVICE_STATUS ioctl */
} info_type_t;

/* Return length of segment depending on type and reshape_len */
static uint32_t _seg_len(const struct lv_segment *seg)
{
	uint32_t reshape_len = seg_is_raid(seg) ? ((seg->area_count - seg->segtype->parity_devs) * seg->reshape_len) : 0;

	return seg->len - reshape_len;
}

static int _info_run(const char *dlid, struct dm_info *dminfo,
		     uint32_t *read_ahead,
		     struct lv_seg_status *seg_status,
		     const char *name_check,
		     int with_open_count, int with_read_ahead,
		     uint32_t major, uint32_t minor)
{
	int r = 0;
	struct dm_task *dmt;
	int dmtask;
	int with_flush; /* TODO: arg for _info_run */
	void *target = NULL;
	uint64_t target_start, target_length, start, extent_size, length, length_crop = 0;
	char *target_name, *target_params;
	const char *devname;

	if (seg_status) {
		dmtask = DM_DEVICE_STATUS;
		with_flush = 0;
	} else {
		dmtask = DM_DEVICE_INFO;
		with_flush = 1; /* doesn't really matter */
	}

	if (!(dmt = _setup_task_run(dmtask, dminfo, NULL, dlid, 0, major, minor,
				    with_open_count, with_flush, 0)))
		return_0;

	if (name_check && dminfo->exists &&
	    (devname = dm_task_get_name(dmt)) &&
	    (strcmp(name_check, devname) != 0))
		dminfo->exists = 0;	/* mismatching name -> device does not exist */

	if (with_read_ahead && read_ahead && dminfo->exists) {
		if (!dm_task_get_read_ahead(dmt, read_ahead))
			goto_out;
	} else if (read_ahead)
		*read_ahead = DM_READ_AHEAD_NONE;

	/* Query status only for active device */
	if (seg_status && dminfo->exists) {
		extent_size = length = seg_status->seg->lv->vg->extent_size;
		start = extent_size * seg_status->seg->le;
		length *= _seg_len(seg_status->seg);

		/* Uses max DM_THIN_MAX_METADATA_SIZE sectors for metadata device */
		if (lv_is_thin_pool_metadata(seg_status->seg->lv) &&
		    (length > DM_THIN_MAX_METADATA_SIZE))
			length_crop = DM_THIN_MAX_METADATA_SIZE;

		/* Uses virtual size with headers for VDO pool device */
		if (lv_is_vdo_pool(seg_status->seg->lv))
			length = get_vdo_pool_virtual_size(seg_status->seg);

		if (lv_is_integrity(seg_status->seg->lv))
			length = seg_status->seg->integrity_data_sectors;

		do {
			target = dm_get_next_target(dmt, target, &target_start,
						    &target_length, &target_name, &target_params);

			if ((start == target_start) &&
			    ((length == target_length) ||
			     ((lv_is_vdo_pool(seg_status->seg->lv)) && /* should fit within extent size */
			      (length < target_length) && ((length + extent_size) > target_length)) ||
			     (length_crop && (length_crop == target_length))))
				break; /* Keep target_params when matching segment is found */

			target_params = NULL; /* Marking this target_params unusable */
		} while (target);

		if (!target_name ||
		    !_get_segment_status_from_target_params(target_name, target_params, dminfo, seg_status))
			stack;
	}

	r = 1;

      out:
	dm_task_destroy(dmt);

	return r;
}

/*
 * ignore_blocked_mirror_devices
 * @dev
 * @start
 * @length
 * @mirror_status_str
 *
 * When a DM 'mirror' target is created with 'block_on_error' or
 * 'handle_errors', it will block I/O if there is a device failure
 * until the mirror is reconfigured.  Thus, LVM should never attempt
 * to read labels from a mirror that has a failed device.  (LVM
 * commands are issued to repair mirrors; and if LVM is blocked
 * attempting to read a mirror, a circular dependency would be created.)
 *
 * This function is a slimmed-down version of lib/mirror/mirrored.c:
 * _mirrored_transient_status().
 *
 * If a failed device is detected in the status string, then it must be
 * determined if 'block_on_error' or 'handle_errors' was used when
 * creating the mirror.  This info can only be determined from the mirror
 * table.  The 'dev', 'start', 'length' trio allow us to correlate the
 * 'mirror_status_str' with the correct device table in order to check
 * for blocking.
 *
 * Returns: 1 if mirror should be ignored, 0 if safe to use
 */
static int _ignore_blocked_mirror_devices(struct cmd_context *cmd,
					  struct device *dev,
					  uint64_t start, uint64_t length,
					  char *mirror_status_str)
{
	struct dm_pool *mem;
	struct dm_status_mirror *sm;
	unsigned i, check_for_blocking = 0;
	uint64_t s,l;
	char *p, *params, *target_type = NULL;
	void *next = NULL;
	struct dm_task *dmt = NULL;
	int r = 0;
	char fake_dev_name[16];
	struct device fake_dev = { .fd = 0 };
	struct dm_str_list *alias;

	if (!(mem = dm_pool_create("blocked_mirrors", 128)))
		return_0;

	if (!dm_get_status_mirror(mem, mirror_status_str, &sm))
		goto_out;

	for (i = 0; i < sm->dev_count; ++i)
		if (sm->devs[i].health != DM_STATUS_MIRROR_ALIVE) {
			log_debug_activation("%s: Mirror image %d marked as failed.",
					     dev_name(dev), i);
			check_for_blocking = 1;
		}

	if (!check_for_blocking && sm->log_count) {
		if (sm->logs[0].health != DM_STATUS_MIRROR_ALIVE) {
			log_debug_activation("%s: Mirror log device marked as failed.",
					     dev_name(dev));
			check_for_blocking = 1;
		} else {
			dev_init(&fake_dev);
			if (dm_snprintf(fake_dev_name, sizeof(fake_dev_name), "%u:%u",
					sm->logs[0].major, sm->logs[0].minor) < 0)
				goto_out;

			if (!(alias = dm_pool_zalloc(mem, sizeof(*alias))))
				goto_out;
			if (!(alias->str = dm_pool_strdup(mem, fake_dev_name)))
				goto_out;
			dm_list_add(&fake_dev.aliases, &alias->list);
			fake_dev.flags = DEV_REGULAR;
			fake_dev.dev = MKDEV(sm->logs[0].major, sm->logs[0].minor);

			if (dm_device_is_usable(cmd, &fake_dev, (struct dev_usable_check_params)
					       { .check_empty = 1,
					         .check_blocked = 1,
					         .check_suspended = ignore_suspended_devices(),
					         .check_error_target = 1,
					         .check_reserved = 0 }, NULL))
				goto out; /* safe to use */
			stack;
		}
	}

	if (!check_for_blocking) {
		r = 1;
		goto out;
	}

	/*
	 * We avoid another system call if we can, but if a device is
	 * dead, we have no choice but to look up the table too.
	 */
	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, NULL, NULL, NULL, NULL,
				    MAJOR(dev->dev), MINOR(dev->dev), 0, 1, 0)))
		goto_out;

	do {
		next = dm_get_next_target(dmt, next, &s, &l,
					  &target_type, &params);
		if ((s == start) && (l == length) &&
		    target_type && params) {
			if (strcmp(target_type, TARGET_NAME_MIRROR))
				goto_out;

			if (((p = strstr(params, " block_on_error")) &&
			     (p[15] == '\0' || p[15] == ' ')) ||
			    ((p = strstr(params, " handle_errors")) &&
			     (p[14] == '\0' || p[14] == ' '))) {
				log_debug_activation("%s: I/O blocked to mirror device.",
						     dev_name(dev));
				goto out;
			}
		}
	} while (next);

	r = 1;
out:
	if (dmt)
		dm_task_destroy(dmt);

	dm_pool_destroy(mem);

	return r;
}

static int _device_is_suspended(int major, int minor)
{
	struct dm_task *dmt;
	struct dm_info info;

	if (!(dmt = _setup_task_run(DM_DEVICE_INFO, &info,
				    NULL, NULL, NULL,
				    major, minor, 0, 0, 0)))
		return_0;

	dm_task_destroy(dmt);

	return (info.exists && info.suspended);
}

static int _ignore_suspended_snapshot_component(struct device *dev)
{
	struct dm_task *dmt;
	void *next = NULL;
	char *params, *target_type = NULL;
	uint64_t start, length;
	int major1, minor1, major2, minor2;
	int r = 0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, NULL,
				    NULL, NULL, NULL,
				    MAJOR(dev->dev), MINOR(dev->dev), 0, 1, 0)))
		return_0;

	do {
		next = dm_get_next_target(dmt, next, &start, &length, &target_type, &params);

		if (!target_type)
			continue;

		if (!strcmp(target_type, TARGET_NAME_SNAPSHOT)) {
			if (!params || sscanf(params, "%d:%d %d:%d", &major1, &minor1, &major2, &minor2) != 4) {
				log_warn("WARNING: Incorrect snapshot table found for %u:%u.",
					 MAJOR(dev->dev), MINOR(dev->dev));
				goto out;
			}
			r = r || _device_is_suspended(major1, minor1) || _device_is_suspended(major2, minor2);
		} else if (!strcmp(target_type, TARGET_NAME_SNAPSHOT_ORIGIN)) {
			if (!params || sscanf(params, "%d:%d", &major1, &minor1) != 2) {
				log_warn("WARNING: Incorrect snapshot-origin table found for %u:%u.",
					 MAJOR(dev->dev), MINOR(dev->dev));
				goto out;
			}
			r = r || _device_is_suspended(major1, minor1);
		}
	} while (next);

out:
	dm_task_destroy(dmt);

	return r;
}

static int _ignore_unusable_thins(struct device *dev)
{
	/* TODO make function for thin testing */
	struct dm_pool *mem;
	struct dm_status_thin_pool *status;
	struct dm_task *dmt = NULL;
	void *next = NULL;
	uint64_t start, length;
	char *target_type = NULL;
	char *params;
	int minor, major;
	int r = 0;

	if (!(mem = dm_pool_create("unusable_thins", 128)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, NULL, NULL, NULL, NULL,
				    MAJOR(dev->dev), MINOR(dev->dev), 0, 1, 0)))
		goto_out;

	dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
	if (!params || sscanf(params, "%d:%d", &major, &minor) != 2) {
		log_warn("WARNING: Cannot get thin-pool major:minor for thin device %u:%u.",
			 MAJOR(dev->dev), MINOR(dev->dev));
		goto out;
	}
	dm_task_destroy(dmt);

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, NULL, NULL, NULL, NULL,
				    major, minor, 0, 0, 0)))
		goto_out;

	dm_get_next_target(dmt, next, &start, &length, &target_type, &params);
	if (!dm_get_status_thin_pool(mem, params, &status))
		goto_out;

	if (status->read_only || status->out_of_data_space) {
		log_warn("WARNING: %s: Thin's thin-pool needs inspection.",
			 dev_name(dev));
		goto out;
	}

	r = 1;
out:
	if (dmt)
		dm_task_destroy(dmt);

	dm_pool_destroy(mem);

        return r;
}

static int _ignore_invalid_snapshot(const char *params)
{
	struct dm_status_snapshot *s;
	struct dm_pool *mem;
	int r = 0;

	if (!(mem = dm_pool_create("invalid snapshots", 128)))
		return_0;

	if (!dm_get_status_snapshot(mem, params, &s))
		stack;
        else
		r = s->invalid;

	dm_pool_destroy(mem);

	return r;
}

static int _ignore_frozen_raid(struct device *dev, const char *params)
{
	struct dm_status_raid *s;
	struct dm_pool *mem;
	int r = 0;

	if (!(mem = dm_pool_create("frozen raid", 128)))
		return_0;

	if (!dm_get_status_raid(mem, params, &s))
		stack;
	else if (s->sync_action && !strcmp(s->sync_action, "frozen")) {
		log_warn("WARNING: %s frozen raid device (%u:%u) needs inspection.",
			 dev_name(dev), MAJOR(dev->dev), MINOR(dev->dev));
		r = 1;
	}

	dm_pool_destroy(mem);

	return r;
}

static int _is_usable_uuid(const struct device *dev, const char *name, const char *uuid, int check_reserved, int check_lv, int *is_lv)
{
	char *vgname, *lvname, *layer;
	char vg_name[NAME_LEN];

	if (!check_reserved && !check_lv)
		return 1;

	if (!strncmp(uuid, UUID_PREFIX, sizeof(UUID_PREFIX) - 1)) { /* with LVM- prefix */
		if (check_reserved) {
			/* Check internal lvm devices */
			if (strlen(uuid) > (sizeof(UUID_PREFIX) + 2 * ID_LEN)) { /* 68 with suffix */
				log_debug_activation("%s: Reserved uuid %s on internal LV device %s not usable.",
						     dev_name(dev), uuid, name);
				return 0;
			}

			/* Recognize some older reserved LVs just from the LV name (snapshot, pvmove...) */
			vgname = vg_name;
			if (!_dm_strncpy(vg_name, name, sizeof(vg_name)) ||
			    !dm_split_lvm_name(NULL, NULL, &vgname, &lvname, &layer))
				return_0;

			/* FIXME: fails to handle dev aliases i.e. /dev/dm-5, replace with UUID suffix */
			if (lvname && (is_reserved_lvname(lvname) || *layer)) {
				log_debug_activation("%s: Reserved internal LV device %s/%s%s%s not usable.",
						     dev_name(dev), vgname, lvname, *layer ? "-" : "", layer);
				return 0;
			}
		}

		if (check_lv) {
			/* Skip LVs */
			if (is_lv)
				*is_lv = 1;
			return 0;
		}
	}

	if (check_reserved &&
	    (!strncmp(uuid, CRYPT_TEMP, sizeof(CRYPT_TEMP) - 1) ||
	     !strncmp(uuid, CRYPT_SUBDEV, sizeof(CRYPT_SUBDEV) - 1) ||
	     !strncmp(uuid, STRATIS, sizeof(STRATIS) - 1))) {
		/* Skip private crypto devices */
		log_debug_activation("%s: Reserved uuid %s on %s device %s not usable.",
				     dev_name(dev), uuid,
				     uuid[0] == 'C' ? "crypto" : "stratis",
				     name);
		return 0;
	}

        return 1;
}

/*
 * dm_device_is_usable
 * @dev
 * @check_lv_names
 *
 * A device is considered not usable if it is:
 *     1) An empty device (no targets)
 *     2) A blocked mirror (i.e. a mirror with a failure and block_on_error set)
 *     3) ignore_suspended_devices is set and
 *        a) the device is suspended
 *        b) it is a snapshot origin
 *     4) an error target
 *     5) the LV name is a reserved name.
 *
 * Returns: 1 if usable, 0 otherwise
 */
int dm_device_is_usable(struct cmd_context *cmd, struct device *dev, struct dev_usable_check_params check, int *is_lv)
{
	struct dm_task *dmt;
	struct dm_info info;
	const char *name, *uuid;
	const struct dm_active_device *dm_dev;
	uint64_t start, length;
	char *target_type = NULL;
	char *params;
	void *next = NULL;
	int only_error_or_zero_target = 1;
	int r = 0;

	if (dm_devs_cache_use() &&
	    /* With cache we can avoid status calls for unusable UUIDs */
	    (dm_dev = dm_devs_cache_get_by_devno(cmd, dev->dev)) &&
	    !_is_usable_uuid(dev, dm_dev->name, dm_dev->uuid, check.check_reserved, check.check_lv, is_lv))
		return 0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, NULL, NULL,
				    MAJOR(dev->dev), MINOR(dev->dev), 0, 0, 0)))
		return_0;

	if (!info.exists)
		goto out;

	name = dm_task_get_name(dmt);
	uuid = dm_task_get_uuid(dmt);

	if (check.check_empty && !info.target_count) {
		log_debug_activation("%s: Empty device %s not usable.", dev_name(dev), name);
		goto out;
	}

	if (check.check_suspended && info.suspended) {
		log_debug_activation("%s: Suspended device %s not usable.", dev_name(dev), name);
		goto out;
	}

	if (uuid &&
	    !_is_usable_uuid(dev, name, uuid, check.check_reserved, check.check_lv, is_lv))
		goto out;

	/* FIXME Also check for mpath no paths */
	do {
		next = dm_get_next_target(dmt, next, &start, &length,
					  &target_type, &params);

		if (!target_type)
			continue;

		if (check.check_blocked && !strcmp(target_type, TARGET_NAME_MIRROR)) {
			if (ignore_lvm_mirrors()) {
				log_debug_activation("%s: Scanning mirror devices is disabled.", dev_name(dev));
				goto out;
			}
			if (!_ignore_blocked_mirror_devices(cmd, dev, start,
							    length, params)) {
				log_debug_activation("%s: Mirror device %s not usable.",
						     dev_name(dev), name);
				goto out;
			}
		}

		/*
		 * FIXME: Snapshot origin could be sitting on top of a mirror
		 * which could be blocking I/O. We should add a check for the
		 * stack here and see if there's blocked mirror underneath.
		 * Currently, mirrors used as origin or snapshot is not
		 * supported anymore and in general using mirrors in a stack
		 * is disabled by default (with a warning that if enabled,
		 * it could cause various deadlocks).
		 * Similar situation can happen with RAID devices where
		 * a RAID device can be snapshotted.
		 * If one of the RAID legs are down and we're doing
		 * lvconvert --repair, there's a time period in which
		 * snapshot components are (besides other devs) suspended.
		 * See also https://bugzilla.redhat.com/show_bug.cgi?id=1219222
		 * for an example where this causes problems.
		 *
		 * This is a quick check for now, but replace it with more
		 * robust and better check that would check the stack
		 * correctly, not just snapshots but any combination possible
		 * in a stack - use proper dm tree to check this instead.
		 */
		if (check.check_suspended &&
		    (!strcmp(target_type, TARGET_NAME_SNAPSHOT) || !strcmp(target_type, TARGET_NAME_SNAPSHOT_ORIGIN)) &&
		    _ignore_suspended_snapshot_component(dev)) {
			log_debug_activation("%s: %s device %s not usable.", dev_name(dev), target_type, name);
			goto out;
		}

		if (!strcmp(target_type, TARGET_NAME_SNAPSHOT) &&
		    _ignore_invalid_snapshot(params)) {
			log_debug_activation("%s: Invalid %s device %s not usable.", dev_name(dev), target_type, name);
			goto out;
		}

		if (!strncmp(target_type, TARGET_NAME_RAID, 4) && _ignore_frozen_raid(dev, params)) {
			log_debug_activation("%s: Frozen %s device %s not usable.",
					     dev_name(dev), target_type, name);
			goto out;
		}

		/* TODO: extend check struct ? */
		if (!strcmp(target_type, TARGET_NAME_THIN) &&
		    !_ignore_unusable_thins(dev)) {
			log_debug_activation("%s: %s device %s not usable.", dev_name(dev), target_type, name);
			goto out;
		}

		if (only_error_or_zero_target &&
		    strcmp(target_type, TARGET_NAME_ERROR) &&
		    strcmp(target_type, TARGET_NAME_ZERO))
			only_error_or_zero_target = 0;
	} while (next);

	/* Skip devices consisting entirely of error or zero targets. */
	/* FIXME Deal with device stacked above error targets? */
	if (check.check_error_target && only_error_or_zero_target) {
		log_debug_activation("%s: Error device %s not usable.",
				     dev_name(dev), name);
		goto out;
	}

	/* FIXME Also check dependencies? */

	r = 1;

      out:
	dm_task_destroy(dmt);
	return r;
}

/* Read UUID from a given DM device into  buf_uuid */
int devno_dm_uuid(struct cmd_context *cmd, int major, int minor,
		  char *uuid_buf, size_t uuid_buf_size)
{
	struct dm_task *dmt;
	struct dm_info info;
	const struct dm_active_device *dm_dev;
	const char *uuid;
	int r = 0;

	if (major != cmd->dev_types->device_mapper_major)
		return 0;

	if (dm_devs_cache_use()) {
		if ((dm_dev = dm_devs_cache_get_by_devno(cmd, MKDEV(major, minor)))) {
			dm_strncpy(uuid_buf, dm_dev->uuid, uuid_buf_size);
			return 1;
		}
		uuid_buf[0] = 0;
		return 0;
	}

	if (!(dmt = _setup_task_run(DM_DEVICE_INFO, &info, NULL, NULL, NULL,
				    major, minor, 0, 0, 0)))
		return_0;

	if (info.exists && (uuid = dm_task_get_uuid(dmt)))
		r = dm_strncpy(uuid_buf, uuid, uuid_buf_size);

	dm_task_destroy(dmt);

	return r;
}

int dev_dm_uuid(struct cmd_context *cmd, struct device *dev,
		char *uuid_buf, size_t uuid_buf_size)
{
	return devno_dm_uuid(cmd, MAJOR(dev->dev), MINOR(dev->dev),
			     uuid_buf, uuid_buf_size);
}

/*
 * If active LVs were activated by a version of LVM2 before 2.02.00 we must
 * perform additional checks to find them because they do not have the LVM-
 * prefix on their dm uuids.
 * As of 2.02.150, we've chosen to disable this compatibility arbitrarily if
 * we're running kernel version 3 or above.
 */
#define MIN_KERNEL_MAJOR 3

static int _original_uuid_format_check_required(struct cmd_context *cmd)
{
	static int _kernel_major = 0;

	if (!_kernel_major) {
		if ((sscanf(cmd->kernel_vsn, "%d", &_kernel_major) == 1) &&
		    (_kernel_major >= MIN_KERNEL_MAJOR))
			log_debug_activation("Skipping checks for old devices without " UUID_PREFIX
					     " dm uuid prefix (kernel vsn %d >= %d).", _kernel_major, MIN_KERNEL_MAJOR);
		else
			_kernel_major = -1;
	}

	return (_kernel_major == -1);
}

static int _info(struct cmd_context *cmd,
		 const char *name, const char *dlid,
		 int with_open_count, int with_read_ahead, int with_name_check,
		 struct dm_info *dminfo, uint32_t *read_ahead,
		 struct lv_seg_status *seg_status)
{
	char old_style_dlid[sizeof(UUID_PREFIX) + 2 * ID_LEN];
	const char *suffix, *suffix_position;
	const char *name_check = (with_name_check) ? name : NULL;
	unsigned i = 0;

	log_debug_activation("Getting device info for %s [%s].", name, dlid);

	/* Check for dlid */
	if (!_info_run(dlid, dminfo, read_ahead, seg_status, name_check,
		       with_open_count, with_read_ahead, 0, 0))
		return_0;

	if (dminfo->exists)
		return 1;

	/* Check for original version of dlid before the suffixes got added in 2.02.106 */
	if ((suffix_position = strrchr(dlid, '-'))) {
		while ((suffix = _uuid_suffix_list[i++])) {
			if (strcmp(suffix_position + 1, suffix))
				continue;

			dm_strncpy(old_style_dlid, dlid, sizeof(old_style_dlid));
			if (!_info_run(old_style_dlid, dminfo, read_ahead, seg_status,
				       name_check, with_open_count, with_read_ahead,
				       0, 0))
				return_0;
			if (dminfo->exists)
				return 1;
		}
	}

	/* Must we still check for the pre-2.02.00 dm uuid format? */
	if (!_original_uuid_format_check_required(cmd))
		return 1;

	/* Check for dlid before UUID_PREFIX was added */
	if (!_info_run(dlid + sizeof(UUID_PREFIX) - 1, dminfo, read_ahead, seg_status,
		       name_check, with_open_count, with_read_ahead, 0, 0))
		return_0;

	return 1;
}

int dev_manager_remove_dm_major_minor(uint32_t major, uint32_t minor)
{
	struct dm_task *dmt;
	int r = 0;

	log_verbose("Removing dm dev %u:%u", major, minor);

	if (!(dmt = dm_task_create(DM_DEVICE_REMOVE)))
		return_0;

	if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
		log_error("Failed to set device number for remove %u:%u", major, minor);
		goto out;
	}

	r = dm_task_run(dmt);
out:
	dm_task_destroy(dmt);

	return r;
}

static int _info_by_dev(uint32_t major, uint32_t minor, struct dm_info *info)
{
	return _info_run(NULL, info, NULL, NULL, NULL, 0, 0, major, minor);
}

int dev_manager_check_prefix_dm_major_minor(uint32_t major, uint32_t minor, const char *prefix)
{
	struct dm_task *dmt;
	const char *uuid;
	int r = 1;

	if (!(dmt = _setup_task_run(DM_DEVICE_INFO, NULL, NULL, NULL, 0, major, minor, 0, 0, 0)))
		return_0;

	if (!(uuid = dm_task_get_uuid(dmt)) || strncasecmp(uuid, prefix, strlen(prefix)))
		r = 0;

	dm_task_destroy(dmt);

	return r;
}

/*
 * Get a list of active dm devices from the kernel.
 * The 'devs' list contains a struct dm_active_device.
 */

int dev_manager_get_dm_active_devices(const char *prefix, struct dm_list **devs, unsigned *devs_features)
{
	struct dm_task *dmt;
	int r = 1;

	if (!(dmt = _setup_task_run(DM_DEVICE_LIST, NULL, NULL, NULL, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (!dm_task_get_device_list(dmt, devs, devs_features)) {
		r = 0;
		goto_out;
	}

    out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_info(struct cmd_context *cmd,
		     const struct logical_volume *lv, const char *layer,
		     int with_open_count, int with_read_ahead, int with_name_check,
		     struct dm_info *dminfo, uint32_t *read_ahead,
		     struct lv_seg_status *seg_status)
{
	char old_style_dlid[sizeof(UUID_PREFIX) + 2 * ID_LEN];
	char *dlid, *name;
	int r = 0;

	if (!(name = dm_build_dm_name(cmd->mem, lv->vg->name, lv->name, layer)))
		return_0;

	if (!(dlid = build_dm_uuid(cmd->mem, lv, layer)))
		goto_out;

	dm_strncpy(old_style_dlid, dlid, sizeof(old_style_dlid));

	if (dm_devs_cache_use() &&
	    !dm_devs_cache_get_by_uuid(cmd, dlid) &&
	    !dm_devs_cache_get_by_uuid(cmd, old_style_dlid)) {
		log_debug("Cached as inactive %s.", name);
		if (dminfo)
			memset(dminfo, 0, sizeof(*dminfo));
		r = 1;
		goto out;
	}

	if (!(r = _info(cmd, name, dlid,
			with_open_count, with_read_ahead, with_name_check,
			dminfo, read_ahead, seg_status)))
		stack;
out:
	dm_pool_free(cmd->mem, name);

	return r;
}

static struct dm_tree_node *_cached_dm_tree_node(struct dm_pool *mem,
						       struct dm_tree *dtree,
						       const struct logical_volume *lv,
						       const char *layer)
{
	struct dm_tree_node *dnode;
	char *dlid;

	if (!(dlid = build_dm_uuid(mem, lv, layer)))
		return_NULL;

	dnode = dm_tree_find_node_by_uuid(dtree, dlid);

	dm_pool_free(mem, dlid);

	return dnode;
}

static const struct dm_info *_cached_dm_info(struct dm_pool *mem,
					     struct dm_tree *dtree,
					     const struct logical_volume *lv,
					     const char *layer)
{
	const struct dm_tree_node *dnode;
	const struct dm_info *dinfo = NULL;

	if (!(dnode = _cached_dm_tree_node(mem, dtree, lv, layer)))
		return NULL;

	if (!(dinfo = dm_tree_node_get_info(dnode))) {
		log_warn("WARNING: Cannot get info from tree node for %s.",
			 display_lvname(lv));
		return NULL;
	}

	if (!dinfo->exists)
		dinfo = NULL;

	return dinfo;
}

int lv_has_target_type(struct dm_pool *mem, const struct logical_volume *lv,
		       const char *layer, const char *target_type)
{
	int r = 0;
	char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	void *next = NULL;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;

	if (!(dlid = build_dm_uuid(mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, 0, 0)))
		goto_bad;

	if (!info.exists)
		goto_out;

	/* If there is a preloaded table, use that in preference. */
	if (info.inactive_table) {
		dm_task_destroy(dmt);

		if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, 0, 1)))
			goto_bad;

		if (!info.exists || !info.inactive_table)
			goto_out;
	}

	do {
		next = dm_get_next_target(dmt, next, &start, &length,
					  &type, &params);
		if (type && !strncmp(type, target_type, strlen(target_type))) {
			r = 1;
			break;
		}
	} while (next);

out:
	dm_task_destroy(dmt);
bad:
	dm_pool_free(mem, dlid);

	return r;
}

static int _lv_has_thin_device_id(struct dm_pool *mem, const struct logical_volume *lv,
				  const char *layer, unsigned device_id)
{
	char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	void *next = NULL;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	unsigned id = ~0;

	if (!(dlid = build_dm_uuid(mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, &info, NULL, dlid, 0, 0, 0, 0, 1, 0)))
		goto_bad;

	if (!info.exists)
		goto_out;

	/* If there is a preloaded table, use that in preference. */
	if (info.inactive_table) {
		dm_task_destroy(dmt);

		if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, &info, NULL, dlid, 0, 0, 0, 0, 1, 1)))
			goto_bad;

		if (!info.exists || !info.inactive_table)
			goto_out;
	}

	(void) dm_get_next_target(dmt, next, &start, &length, &type, &params);

	if (!type || strcmp(type, TARGET_NAME_THIN))
		goto_out;

	if (!params || sscanf(params, "%*u:%*u %u", &id) != 1)
		goto_out;

	log_debug_activation("%soaded thin volume %s with id %u is %smatching id %u.",
			     info.inactive_table  ? "Prel" : "L",
			     display_lvname(lv), id,
			     (device_id != id) ? "not " : "", device_id);
out:
	dm_task_destroy(dmt);
bad:
	dm_pool_free(mem, dlid);

	return (device_id == id);
}

int add_linear_area_to_dtree(struct dm_tree_node *node, uint64_t size, uint32_t extent_size,
			     int use_linear_target, const char *vgname, const char *lvname)
{
	uint32_t page_size;

	/*
	 * Use striped or linear target?
	 */
	if (!use_linear_target) {
		page_size = lvm_getpagesize() >> SECTOR_SHIFT;

		/*
		 * We'll use the extent size as the stripe size.
		 * Extent size and page size are always powers of 2.
		 * The striped target requires that the stripe size is
		 * divisible by the page size.
		 */
		if (extent_size >= page_size) {
			/* Use striped target */
			if (!dm_tree_node_add_striped_target(node, size, extent_size))
				return_0;
			return 1;
		}

		/* Some exotic cases are unsupported by striped. */
		log_warn("WARNING: Using linear target for %s/%s: Striped requires extent size "
			 "(" FMTu32 " sectors) >= page size (" FMTu32 ").",
			 vgname, lvname, extent_size, page_size);
	}

	/*
	 * Use linear target.
	 */
	if (!dm_tree_node_add_linear_target(node, size))
		return_0;

	return 1;
}

static dm_percent_range_t _combine_percent(dm_percent_t a, dm_percent_t b,
					   uint32_t numerator, uint32_t denominator)
{
	if (a == LVM_PERCENT_MERGE_FAILED || b == LVM_PERCENT_MERGE_FAILED)
		return LVM_PERCENT_MERGE_FAILED;

	if (a == DM_PERCENT_INVALID || b == DM_PERCENT_INVALID)
		return DM_PERCENT_INVALID;

	if (a == DM_PERCENT_100 && b == DM_PERCENT_100)
		return DM_PERCENT_100;

	if (a == DM_PERCENT_0 && b == DM_PERCENT_0)
		return DM_PERCENT_0;

	return (dm_percent_range_t) dm_make_percent(numerator, denominator);
}

static int _percent_run(struct dev_manager *dm, const char *name,
			const char *dlid,
			const char *target_type, int wait,
			const struct logical_volume *lv, dm_percent_t *overall_percent,
			uint32_t *event_nr, int fail_if_percent_unsupported,
			int *interrupted)
{
	int r = 0;
	struct dm_task *dmt;
	struct dm_info info;
	void *next = NULL;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	const struct dm_list *segh = lv ? &lv->segments : NULL;
	struct lv_segment *seg = NULL;
	int first_time = 1;
	dm_percent_t percent = DM_PERCENT_INVALID;
	uint64_t total_numerator = 0, total_denominator = 0;
	struct segment_type *segtype;

	*overall_percent = percent;

	if (!(segtype = get_segtype_from_string(dm->cmd, target_type)))
		return_0;

	if (wait)
		sigint_allow();

	if (!(dmt = _setup_task_run(wait ? DM_DEVICE_WAITEVENT : DM_DEVICE_STATUS, &info,
				    name, dlid, event_nr, 0, 0, 0, 0, 0)))
		goto_bad;

	if (!info.exists)
		goto_out;

	if (event_nr)
		*event_nr = info.event_nr;

	do {
		next = dm_get_next_target(dmt, next, &start, &length, &type,
					  &params);
		if (lv) {
			if (!(segh = dm_list_next(&lv->segments, segh))) {
				log_error("Number of segments in active LV %s "
					  "does not match metadata.",
					  display_lvname(lv));
				goto out;
			}
			seg = dm_list_item(segh, struct lv_segment);
		}

		if (!type || !params)
			continue;

		if (strcmp(type, target_type)) {
			/* If kernel's type isn't an exact match is it compatible? */
			if (!segtype->ops->target_status_compatible ||
			    !segtype->ops->target_status_compatible(type))
				continue;
		}

		if (!segtype->ops->target_percent)
			continue;

		if (!segtype->ops->target_percent(&dm->target_state,
						  &percent, dm->mem,
						  dm->cmd, seg, params,
						  &total_numerator,
						  &total_denominator))
			goto_out;

		if (first_time) {
			*overall_percent = percent;
			first_time = 0;
		} else
			*overall_percent =
				_combine_percent(*overall_percent, percent,
						 total_numerator, total_denominator);
	} while (next);

	if (lv && dm_list_next(&lv->segments, segh)) {
		log_error("Number of segments in active LV %s does not "
			  "match metadata.", display_lvname(lv));
		goto out;
	}

	if (first_time) {
		/* above ->target_percent() was not executed! */
		/* FIXME why return PERCENT_100 et. al. in this case? */
		*overall_percent = DM_PERCENT_100;
		if (fail_if_percent_unsupported)
			goto_out;
	}

	log_debug_activation("LV percent: %s",
			     display_percent(dm->cmd, *overall_percent));
	r = 1;

    out:
	dm_task_destroy(dmt);

    bad:
	if (wait) {
		sigint_restore();

		if (sigint_caught()) {
			*interrupted = 1;
			return_0;
		}
	}

	return r;
}

static int _percent(struct dev_manager *dm, const char *name, const char *dlid,
		    const char *target_type, int wait,
		    const struct logical_volume *lv, dm_percent_t *percent,
		    uint32_t *event_nr, int fail_if_percent_unsupported)
{
	int interrupted = 0;

	if (dlid && *dlid) {
		if (_percent_run(dm, NULL, dlid, target_type, wait, lv, percent,
				 event_nr, fail_if_percent_unsupported, &interrupted))
			return 1;

		if (!interrupted &&
		    _original_uuid_format_check_required(dm->cmd) &&
		    _percent_run(dm, NULL, dlid + sizeof(UUID_PREFIX) - 1,
				 target_type, wait, lv, percent,
				 event_nr, fail_if_percent_unsupported, &interrupted))
			return 1;
	}

	if (!interrupted && name &&
	    _percent_run(dm, name, NULL, target_type, wait, lv, percent,
				 event_nr, fail_if_percent_unsupported, &interrupted))
		return 1;

	return_0;
}

/* FIXME Merge with the percent function */
int dev_manager_transient(struct dev_manager *dm, const struct logical_volume *lv)
{
	int r = 0;
	struct dm_task *dmt;
	struct dm_info info;
	void *next = NULL;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	char *dlid = NULL;
	const char *layer = lv_layer(lv);
	const struct dm_list *segh = &lv->segments;
	struct lv_segment *seg = NULL;

	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, NULL, 0, 0, 0, 0, 0)))
		return_0;

	if (!info.exists)
		goto_out;

	do {
		next = dm_get_next_target(dmt, next, &start, &length, &type,
					  &params);

		if (!(segh = dm_list_next(&lv->segments, segh))) {
		    log_error("Number of segments in active LV %s "
			      "does not match metadata.", display_lvname(lv));
		    goto out;
		}
		seg = dm_list_item(segh, struct lv_segment);

		if (!type || !params)
			continue;

		if (!seg) {
			log_error(INTERNAL_ERROR "Segment is not selected.");
			goto out;
		}

		if (seg->segtype->ops->check_transient_status &&
		    !seg->segtype->ops->check_transient_status(dm->mem, seg, params))
			goto_out;

	} while (next);

	if (dm_list_next(&lv->segments, segh)) {
		log_error("Number of segments in active LV %s does not "
			  "match metadata.", display_lvname(lv));
		goto out;
	}

	r = 1;

      out:
	dm_task_destroy(dmt);
	return r;
}

/*
 * dev_manager implementation.
 */
struct dev_manager *dev_manager_create(struct cmd_context *cmd,
				       const char *vg_name,
				       unsigned track_pvmove_deps)
{
	struct dm_pool *mem;
	struct dev_manager *dm;

	if (!(mem = dm_pool_create("dev_manager", 16 * 1024)))
		return_NULL;

	if (!(dm = dm_pool_zalloc(mem, sizeof(*dm))))
		goto_bad;

	dm->cmd = cmd;
	dm->mem = mem;
	dm->vg_name = vg_name;

	/*
	 * When we manipulate (normally suspend/resume) the PVMOVE
	 * device directly, there's no need to touch the LVs above.
	 */
	dm->track_pvmove_deps = track_pvmove_deps;

	dm->target_state = NULL;

	dm_udev_set_sync_support(cmd->current_settings.udev_sync);

	return dm;

      bad:
	dm_pool_destroy(mem);

	return NULL;
}

void dev_manager_destroy(struct dev_manager *dm)
{
	dm_pool_destroy(dm->mem);
}

void dev_manager_release(void)
{
	dm_lib_release();
}

void dev_manager_exit(void)
{
	dm_lib_exit();
}

int dev_manager_snapshot_percent(struct dev_manager *dm,
				 const struct logical_volume *lv,
				 dm_percent_t *percent)
{
	const struct logical_volume *snap_lv;
	char *name;
	const char *dlid;
	int fail_if_percent_unsupported = 0;

	if (lv_is_merging_origin(lv)) {
		/*
		 * Set 'fail_if_percent_unsupported', otherwise passing
		 * unsupported LV types to _percent will lead to a default
		 * successful return with percent_range as PERCENT_100.
		 * - For a merging origin, this will result in a polldaemon
		 *   that runs infinitely (because completion is PERCENT_0)
		 * - We unfortunately don't yet _know_ if a snapshot-merge
		 *   target is active (activation is deferred if dev is open);
		 *   so we can't short-circuit origin devices based purely on
		 *   existing LVM LV attributes.
		 */
		fail_if_percent_unsupported = 1;
	}

	if (lv_is_merging_cow(lv)) {
		/* must check percent of origin for a merging snapshot */
		snap_lv = origin_from_cow(lv);
	} else
		snap_lv = lv;

	/*
	 * Build a name for the top layer.
	 */
	if (!(name = dm_build_dm_name(dm->mem, snap_lv->vg->name, snap_lv->name, NULL)))
		return_0;

	if (!(dlid = build_dm_uuid(dm->mem, snap_lv, NULL)))
		return_0;

	/*
	 * Try and get some info on this device.
	 */
	if (!_percent(dm, name, dlid, TARGET_NAME_SNAPSHOT, 0, NULL, percent,
		      NULL, fail_if_percent_unsupported))
		return_0;

	/* If the snapshot isn't available, percent will be -1 */
	return 1;
}

/* FIXME Merge with snapshot_percent, auto-detecting target type */
/* FIXME Cope with more than one target */
int dev_manager_mirror_percent(struct dev_manager *dm,
			       const struct logical_volume *lv, int wait,
			       dm_percent_t *percent, uint32_t *event_nr)
{
	char *name;
	const char *dlid;
	const char *target_type = first_seg(lv)->segtype->name;
	const char *layer = lv_layer(lv);

	/*
	 * Build a name for the top layer.
	 */
	if (!(name = dm_build_dm_name(dm->mem, lv->vg->name, lv->name, layer)))
		return_0;

	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	log_debug_activation("Getting device %s status percentage for %s.",
			     target_type, name);

	if (!_percent(dm, name, dlid, target_type, wait, lv, percent, event_nr, 0))
		return_0;

	return 1;
}

int dev_manager_raid_status(struct dev_manager *dm,
			    const struct logical_volume *lv,
			    struct lv_status_raid **status, int *exists)
{
	int r = 0;
	const char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	const char *layer = lv_layer(lv);
	struct dm_status_raid *sr;

	*exists = -1;
	if (!(*status = dm_pool_zalloc(dm->mem, sizeof(struct lv_status_cache))))
		return_0;

	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking raid status for volume %s.",
			     display_lvname(lv));

	dm_get_next_target(dmt, NULL, &start, &length, &type, &params);

	if (!type || strcmp(type, TARGET_NAME_RAID)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_RAID, type ? type : "NULL");
		goto out;
	}

	/* FIXME Check there's only one target */

	if (!dm_get_status_raid(dm->mem, params, &sr))
		goto_out;

	(*status)->mem = dm->mem; /* User has to destroy this mem pool later */
	(*status)->raid = sr;
	(*status)->in_sync = dm_make_percent(sr->insync_regions, sr->total_regions);

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_raid_message(struct dev_manager *dm,
			     const struct logical_volume *lv,
			     const char *msg)
{
	int r = 0;
	const char *dlid;
	struct dm_task *dmt;
	const char *layer = lv_layer(lv);

	if (!lv_is_raid(lv)) {
		log_error(INTERNAL_ERROR "%s is not a RAID logical volume.",
			  display_lvname(lv));
		return 0;
	}

	/* These are the supported RAID messages for dm-raid v1.9.0 */
	if (strcmp(msg, "idle") &&
	    strcmp(msg, "frozen") &&
	    strcmp(msg, "resync") &&
	    strcmp(msg, "recover") &&
	    strcmp(msg, "check") &&
	    strcmp(msg, "repair")) {
		log_error(INTERNAL_ERROR "Unknown RAID message: %s.", msg);
		return 0;
	}

	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TARGET_MSG, NULL, NULL, dlid, 0, 0, 0, 0, 1, 0)))
		return_0;

	if (!dm_task_set_message(dmt, msg))
		goto_out;

	if (!dm_task_run(dmt))
		goto_out;

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_writecache_message(struct dev_manager *dm,
				   const struct logical_volume *lv,
				   const char *msg)
{
	int r = 0;
	const char *dlid;
	struct dm_task *dmt;
	const char *layer = lv_layer(lv);

	if (!lv_is_writecache(lv)) {
		log_error(INTERNAL_ERROR "%s is not a writecache logical volume.",
			  display_lvname(lv));
		return 0;
	}

	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TARGET_MSG, NULL, NULL, dlid, 0, 0, 0, 0, 1, 0)))
		return_0;

	if (!dm_task_set_message(dmt, msg))
		goto_out;

	if (!dm_task_run(dmt))
		goto_out;

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_cache_status(struct dev_manager *dm,
			     const struct logical_volume *lv,
			     struct lv_status_cache **status, int *exists)
{
	int r = 0;
	const char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	struct dm_status_cache *c;

	*exists = -1;
	if (!(dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking status for cache volume %s.",
			     display_lvname(lv));

	dm_get_next_target(dmt, NULL, &start, &length, &type, &params);

	if (!type || strcmp(type, TARGET_NAME_CACHE)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_CACHE, type ? type : "NULL");
		goto out;
	}

	/*
	 * FIXME:
	 * ->target_percent() API is able to transfer only a single value.
	 * Needs to be able to pass whole structure.
	 */
	if (!dm_get_status_cache(dm->mem, params, &c))
		goto_out;

	if (!(*status = dm_pool_zalloc(dm->mem, sizeof(struct lv_status_cache))))
		goto_out;

	(*status)->mem = dm->mem; /* User has to destroy this mem pool later */
	(*status)->cache = c;
	if (c->fail || c->error) {
		(*status)->data_usage =
			(*status)->metadata_usage =
			(*status)->dirty_usage = DM_PERCENT_INVALID;
	} else {
		(*status)->data_usage = dm_make_percent(c->used_blocks,
							c->total_blocks);
		(*status)->metadata_usage = dm_make_percent(c->metadata_used_blocks,
							    c->metadata_total_blocks);
		(*status)->dirty_usage = (c->used_blocks) ?
			dm_make_percent(c->dirty_blocks,
					c->used_blocks) : DM_PERCENT_0;
	}
	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_thin_pool_status(struct dev_manager *dm,
				 const struct logical_volume *lv, int flush,
				 struct lv_status_thin_pool **status, int *exists)
{
	struct dm_status_thin_pool *dm_status;
	const char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	int r = 0;

	*exists = -1;
	if (!(*status = dm_pool_zalloc(dm->mem, sizeof(struct lv_status_thin_pool))))
		return_0;

	/* Build dlid for the thin pool layer */
	if (!(dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, flush, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking thin pool status for LV %s.",
			     display_lvname(lv));

	dm_get_next_target(dmt, NULL, &start, &length, &type, &params);

	if (!type || strcmp(type, TARGET_NAME_THIN_POOL)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_THIN_POOL, type ? type : "NULL");
		goto out;
	}

	if (!dm_get_status_thin_pool(dm->mem, params, &dm_status))
		goto_out;

	(*status)->mem = dm->mem;
	(*status)->thin_pool = dm_status;

	if (dm_status->fail || dm_status->error) {
		(*status)->data_usage =
			(*status)->metadata_usage = DM_PERCENT_INVALID;
	} else {
		(*status)->data_usage = dm_make_percent(dm_status->used_data_blocks,
							dm_status->total_data_blocks);
		(*status)->metadata_usage = dm_make_percent(dm_status->used_metadata_blocks,
							    dm_status->total_metadata_blocks);
	}

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_thin_status(struct dev_manager *dm,
			    const struct logical_volume *lv, int flush,
			    struct lv_status_thin **status, int *exists)
{
	struct dm_status_thin *dm_status;
	const char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	uint64_t start, length;
	char *type = NULL;
	char *params = NULL;
	uint64_t csize;
	int r = 0;

	*exists = -1;
	if (!(*status = dm_pool_zalloc(dm->mem, sizeof(struct lv_status_thin))))
		return_0;

	if (!(dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, flush, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking thin status for LV %s.",
			     display_lvname(lv));

	dm_get_next_target(dmt, NULL, &start, &length, &type, &params);

	if (!type || strcmp(type, TARGET_NAME_THIN)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_THIN, type ? type : "NULL");
		goto out;
	}

	if (!dm_get_status_thin(dm->mem, params, &dm_status))
		goto_out;

	(*status)->mem = dm->mem;
	(*status)->thin = dm_status;

	if (dm_status->fail)
		(*status)->usage = DM_PERCENT_INVALID;
	else {
		/* Pool allocates whole chunk so round-up to nearest one */
		csize = first_seg(first_seg(lv)->pool_lv)->chunk_size;
		csize = ((lv->size + csize - 1) / csize) * csize;
		if (dm_status->mapped_sectors > csize) {
			log_warn("WARNING: LV %s maps %s while the size is only %s.",
				 display_lvname(lv),
				 display_size(dm->cmd, dm_status->mapped_sectors),
				 display_size(dm->cmd, csize));
			/* Don't show nonsense numbers like i.e. 1000% full */
			dm_status->mapped_sectors = csize;
		}
		(*status)->usage = dm_make_percent(dm_status->mapped_sectors, csize);
	}

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

/*
 * Explore state of running DM table to obtain currently used deviceId
 */
int dev_manager_thin_device_id(struct dev_manager *dm,
			       const struct logical_volume *lv,
			       uint32_t *device_id, int *exists)
{
	const char *dlid;
	struct dm_task *dmt;
	struct dm_info info;
	uint64_t start, length;
	char *params, *target_type = NULL;
	const char *layer = lv_layer(lv);
	int r = 0;

	*exists = -1;
	if (lv_is_merging_origin(lv) && !lv_info(lv->vg->cmd, lv, 1, NULL, 0, 0))
		/* If the merge has already happened, that table
		 * can already be using correct LV without -real layer */
		layer = NULL;

	/* Build dlid for the thin layer */
	if (!(dlid = build_dm_uuid(dm->mem, lv, layer)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, &info, NULL, dlid, 0, 0, 0, 0, 1, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking device id for LV %s.",
			     display_lvname(lv));

	if (dm_get_next_target(dmt, NULL, &start, &length,
			       &target_type, &params)) {
		log_error("More then one table line found for %s.",
			  display_lvname(lv));
		goto out;
	}

	if (!target_type || strcmp(target_type, TARGET_NAME_THIN)) {
		log_error("Unexpected target type %s found for thin %s.",
			  target_type, display_lvname(lv));
		goto out;
	}

	if (!params || sscanf(params, "%*u:%*u %u", device_id) != 1) {
		log_error("Cannot parse table like parameters %s for %s.",
			  params, display_lvname(lv));
		goto out;
	}

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_vdo_pool_status(struct dev_manager *dm,
				const struct logical_volume *lv, int flush,
				struct lv_status_vdo **status, int *exists)
{
	const char *dlid;
	struct dm_info info;
	uint64_t start, length;
	struct dm_task *dmt = NULL;
	char *type = NULL;
	char *params = NULL;
	int r = 0;

	*exists = -1;
	if (!(*status = dm_pool_zalloc(dm->mem, sizeof(struct lv_status_vdo))))
		return_0;

	if (!(dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_STATUS, &info, NULL, dlid, 0, 0, 0, 0, flush, 0)))
		return_0;

	if (!(*exists = info.exists))
		goto out;

	log_debug_activation("Checking VDO pool status for LV %s.",
			     display_lvname(lv));

	if (dm_get_next_target(dmt, NULL, &start, &length, &type, &params)) {
		log_error("More then one table line found for %s.",
			  display_lvname(lv));
		goto out;
	}

	if (!type || strcmp(type, TARGET_NAME_VDO)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_VDO, type ? type : "NULL");
		goto out;
	}

	if (!_vdo_pool_message_stats(dm->mem, lv, *status))
		stack;

	if (!parse_vdo_pool_status(dm->mem, lv, params, &info, *status))
		goto_out;

	(*status)->mem = dm->mem;

	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

int dev_manager_vdo_pool_size_config(struct dev_manager *dm,
				     const struct logical_volume *lv,
				     struct vdo_pool_size_config *cfg)
{
	const char *dlid;
	struct dm_info info;
	uint64_t start, length;
	struct dm_task *dmt = NULL;
	char *type = NULL;
	char *params = NULL;
	int r = 0;
	unsigned version = 0;

	memset(cfg, 0, sizeof(*cfg));

	if (!(dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, &info, NULL, dlid, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (!info.exists)
		goto inactive; /* VDO device is not active, should not happen here... */

	log_debug_activation("Checking VDO pool table line for LV %s.",
			     display_lvname(lv));

	if (dm_get_next_target(dmt, NULL, &start, &length, &type, &params)) {
		log_error("More then one table line found for %s.",
			  display_lvname(lv));
		goto out;
	}

	if (!type || strcmp(type, TARGET_NAME_VDO)) {
		log_error("Expected %s segment type but got %s instead.",
			  TARGET_NAME_VDO, type ? type : "NULL");
		goto out;
	}

	if (sscanf(params, "V%u %*s " FMTu64 " %*u " FMTu32,
		   &version, &cfg->physical_size, &cfg->block_map_cache_size_mb) != 3) {
		log_error("Failed to parse VDO parameters %s for LV %s.",
			  params, display_lvname(lv));
		goto out;
	}

	switch (version) {
	case 2: break;
	case 4: break;
	default: log_warn("WARNING: Unknown VDO table line version %u.", version);
	}

	cfg->virtual_size = length;
	cfg->physical_size *= 8; // From 4K unit to 512B
	cfg->block_map_cache_size_mb /= 256; // From 4K unit to MiB
	cfg->index_memory_size_mb = first_seg(lv)->vdo_params.index_memory_size_mb; // Preserved

inactive:
	r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}


/*************************/
/*  NEW CODE STARTS HERE */
/*************************/

static int _dev_manager_lv_mknodes(const struct logical_volume *lv)
{
	char *name;

	if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name,
				      lv->name, NULL)))
		return_0;

	return fs_add_lv(lv, name);
}

static int _dev_manager_lv_rmnodes(const struct logical_volume *lv)
{
	return fs_del_lv(lv);
}

static int _lv_has_mknode(const struct logical_volume *lv)
{
	return (lv_is_visible(lv) &&
		(!lv_is_thin_pool(lv) || lv_is_new_thin_pool(lv)));
}

int dev_manager_mknodes(const struct logical_volume *lv)
{
	struct dm_info dminfo;
	struct dm_task *dmt;
	char *name;
	int r = 0;

	if (!(name = dm_build_dm_name(lv->vg->cmd->mem, lv->vg->name, lv->name, NULL)))
		return_0;

	if (!(dmt = _setup_task_run(DM_DEVICE_MKNODES, &dminfo, name, NULL, 0, 0, 0, 0, 0, 0)))
		return_0;

	if (dminfo.exists) {
		/* read-only component LV is also made visible */
		if (_lv_has_mknode(lv) || (dminfo.read_only && lv_is_component(lv)))
			r = _dev_manager_lv_mknodes(lv);
		else
			r = 1;
	} else
		r = _dev_manager_lv_rmnodes(lv);

	dm_task_destroy(dmt);

	return r;
}

#ifdef UDEV_SYNC_SUPPORT
/*
 * Until the DM_UEVENT_GENERATED_FLAG was introduced in kernel patch
 * 856a6f1dbd8940e72755af145ebcd806408ecedd
 * some operations could not be performed by udev, requiring our fallback code.
 */
static int _dm_driver_has_stable_udev_support(void)
{
	char vsn[80];
	unsigned maj, min, patchlevel;

	return driver_version(vsn, sizeof(vsn)) &&
	       (sscanf(vsn, "%u.%u.%u", &maj, &min, &patchlevel) == 3) &&
	       (maj == 4 ? min >= 18 : maj > 4);
}

static int _check_udev_fallback(struct cmd_context *cmd)
{
	struct config_info *settings = &cmd->current_settings;

	if (settings->udev_fallback != -1)
		goto out;

	/*
	 * Use udev fallback automatically in case udev
	 * is disabled via DM_DISABLE_UDEV environment
	 * variable or udev rules are switched off.
	 */
	settings->udev_fallback = !settings->udev_rules ? 1 :
		find_config_tree_bool(cmd, activation_verify_udev_operations_CFG, NULL);

	/* Do not rely fully on udev if the udev support is known to be incomplete. */
	if (!settings->udev_fallback && !_dm_driver_has_stable_udev_support()) {
		log_very_verbose("Kernel driver has incomplete udev support so "
				 "LVM will check and perform some operations itself.");
		settings->udev_fallback = 1;
	}
out:
	return settings->udev_fallback;
}

#else /* UDEV_SYNC_SUPPORT */

static int _check_udev_fallback(struct cmd_context *cmd)
{
	/* We must use old node/symlink creation code if not compiled with udev support at all! */
	return cmd->current_settings.udev_fallback = 1;
}

#endif /* UDEV_SYNC_SUPPORT */

static uint16_t _get_udev_flags(struct dev_manager *dm, const struct logical_volume *lv,
				const char *layer, int noscan, int temporary,
				int visible_component)
{
	uint16_t udev_flags = 0;

	/*
	 * Instruct also libdevmapper to disable udev
	 * fallback in accordance to LVM2 settings.
	 */
	if (!_check_udev_fallback(dm->cmd))
		udev_flags |= DM_UDEV_DISABLE_LIBRARY_FALLBACK;

	/*
	 * Is this top-level and visible device?
	 * If not, create just the /dev/mapper content.
	 */
	/* FIXME: add target's method for this */
	if (lv_is_new_thin_pool(lv) || visible_component)
		/* New thin-pool is regular LV with -tpool UUID suffix. */
		udev_flags |= DM_UDEV_DISABLE_DISK_RULES_FLAG |
		              DM_UDEV_DISABLE_OTHER_RULES_FLAG;
	else if (layer || !lv_is_visible(lv) || lv_is_thin_pool(lv) || lv_is_vdo_pool(lv))
		udev_flags |= DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG |
			      DM_UDEV_DISABLE_DISK_RULES_FLAG |
			      DM_UDEV_DISABLE_OTHER_RULES_FLAG;
	/*
	 * There's no need for other udev rules to touch special LVs with
	 * reserved names. We don't need to populate /dev/disk here either.
	 * Even if they happen to be visible and top-level.
	 */
	else if (is_reserved_lvname(lv->name))
		udev_flags |= DM_UDEV_DISABLE_DISK_RULES_FLAG |
			      DM_UDEV_DISABLE_OTHER_RULES_FLAG;

	/*
	 * Snapshots and origins could have the same rule applied that will
	 * give symlinks exactly the same name (e.g. a name based on
	 * filesystem UUID). We give preference to origins to make such
	 * naming deterministic (e.g. symlinks in /dev/disk/by-uuid).
	 */
	if (lv_is_cow(lv))
		udev_flags |= DM_UDEV_LOW_PRIORITY_FLAG;

	/*
	 * Finally, add flags to disable /dev/mapper and /dev/<vgname> content
	 * to be created by udev if it is requested by user's configuration.
	 * This is basically an explicit fallback to old node/symlink creation
	 * without udev.
	 */
	if (!dm->cmd->current_settings.udev_rules)
		udev_flags |= DM_UDEV_DISABLE_DM_RULES_FLAG |
			      DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG;

	/*
	 * LVM subsystem specific flags.
	 */
	if (noscan)
		udev_flags |= DM_SUBSYSTEM_UDEV_FLAG0;

	if (temporary)
		udev_flags |= DM_UDEV_DISABLE_DISK_RULES_FLAG |
			      DM_UDEV_DISABLE_OTHER_RULES_FLAG;

	return udev_flags;
}

static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
			    const struct logical_volume *lv, int origin_only);
static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
				const struct logical_volume *lv,
				struct lv_activate_opts *laopts,
				const char *layer);
/*
 * Check for device holders (ATM used only for removed pvmove targets)
 * and add them into dtree structures.
 * When 'laopts != NULL' add them as new nodes - which also corrects READ_AHEAD.
 * Note: correct table are already explicitly PRELOADED.
 */
static int _check_holder(struct dev_manager *dm, struct dm_tree *dtree,
			 const struct logical_volume *lv,
			 struct lv_activate_opts *laopts,
			 uint32_t major, const char *d_name)
{
	const char *default_uuid_prefix = dm_uuid_prefix();
	const size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
	const char *name;
	const char *uuid;
	struct dm_info info;
	struct dm_task *dmt;
	struct logical_volume *lv_det;
	union lvid id;
	int dev, r = 0;

	errno = 0;
	dev = strtoll(d_name + 3, NULL, 10);
	if (errno) {
		log_error("Failed to parse dm device minor number from %s.", d_name);
		return 0;
	}

	if (!(dmt = _setup_task_run(DM_DEVICE_INFO, &info, NULL, NULL, NULL,
				    major, dev, 0, 0, 0)))
		return_0;

	if (info.exists) {
		uuid = dm_task_get_uuid(dmt);
		name = dm_task_get_name(dmt);

		log_debug_activation("Checking holder of %s  %s (" FMTu32 ":" FMTu32 ") %s.",
				     display_lvname(lv), uuid, info.major, info.minor,
				     name);

		/* Skip common uuid prefix */
		if (!strncmp(default_uuid_prefix, uuid, default_uuid_prefix_len))
			uuid += default_uuid_prefix_len;

		if (!memcmp(uuid, &lv->vg->id, ID_LEN) &&
		    !dm_tree_find_node_by_uuid(dtree, uuid)) {
			/* trims any UUID suffix (i.e. -cow) */
			dm_strncpy((char*)&id, uuid, 2 * sizeof(struct id) + 1);

			/* If UUID is not yet in dtree, look for matching LV */
			if (!(lv_det = find_lv_in_vg_by_lvid(lv->vg, &id))) {
				log_error("Cannot find holder with device name %s in VG %s.",
					  name, lv->vg->name);
				goto out;
			}

			if (lv_is_cow(lv_det))
				lv_det = origin_from_cow(lv_det);
			log_debug_activation("Found holder %s of %s.",
					     display_lvname(lv_det),
					     display_lvname(lv));
			if (!laopts) {
				if (!_add_lv_to_dtree(dm, dtree, lv_det, 0))
					goto_out;
			} else if (!_add_new_lv_to_dtree(dm, dtree, lv_det, laopts, 0))
					goto_out;
		}
	}

        r = 1;
out:
	dm_task_destroy(dmt);

	return r;
}

/*
 * Add exiting devices which holds given LV device open.
 * This is used in case when metadata already do not contain information
 * i.e. PVMOVE is being finished and final table is going to be resumed.
 */
static int _add_holders_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
				 const struct logical_volume *lv,
				 struct lv_activate_opts *laopts,
				 const struct dm_info *info)
{
	const char *sysfs_dir = dm_sysfs_dir();
	char sysfs_path[PATH_MAX];
	struct dirent *dirent;
	DIR *d;
	int r = 0;

	/* Sysfs path of holders */
	if (dm_snprintf(sysfs_path, sizeof(sysfs_path), "%sblock/dm-" FMTu32
			"/holders", sysfs_dir, info->minor) < 0) {
		log_error("sysfs_path dm_snprintf failed.");
		return 0;
	}

	if (!(d = opendir(sysfs_path))) {
		log_sys_error("opendir", sysfs_path);
		return 0;
	}

	while ((dirent = readdir(d)))
		/* Expects minor is added to 'dm-' prefix */
		if (!strncmp(dirent->d_name, "dm-", 3) &&
		    !_check_holder(dm, dtree, lv, laopts, info->major, dirent->d_name))
			goto_out;

	r = 1;
out:
	if (closedir(d))
		log_sys_debug("closedir", "holders");

	return r;
}

static int _add_dev_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
			     const struct logical_volume *lv, const char *layer)
{
	char *dlid, *name;
	struct dm_info info, info2;
	const struct dm_active_device *dm_dev;

	if (!(name = dm_build_dm_name(dm->mem, lv->vg->name, lv->name, layer)))
		return_0;

	if (!(dlid = build_dm_uuid(dm->track_pending_delete ? dm->cmd->pending_delete_mem : dm->mem, lv, layer)))
		return_0;

	if (dm_devs_cache_use()) {
		if (!(dm_dev = dm_devs_cache_get_by_uuid(dm->cmd, dlid))) {
			log_debug("Cached as not present %s.", name);
			return 1;
		}
		info = (struct dm_info) {
			.exists = 1,
			.major = MAJOR(dm_dev->devno),
			.minor = MINOR(dm_dev->devno),
		};
		log_debug("Cached as present %s %s (%d:%d).",
			  name, dlid, info.major, info.minor);
	} else if (!_info(dm->cmd, name, dlid, 0, 0, 0, &info, NULL, NULL))
		return_0;
	/*
	 * For top level volumes verify that existing device match
	 * requested major/minor and that major/minor pair is available for use
	 */
	if (!layer && lv->major != -1 && lv->minor != -1) {
		/*
		 * FIXME compare info.major with lv->major if multiple major support
		 */
		if (info.exists && ((int) info.minor != lv->minor)) {
			log_error("Volume %s (%" PRIu32 ":%" PRIu32")"
				  " differs from already active device "
				  "(%" PRIu32 ":%" PRIu32").",
				  display_lvname(lv), lv->major, lv->minor,
				  info.major, info.minor);
			return 0;
		}
		if (!info.exists && _info_by_dev(lv->major, lv->minor, &info2) &&
		    info2.exists) {
			log_error("The requested major:minor pair "
				  "(%" PRIu32 ":%" PRIu32") is already used.",
				  lv->major, lv->minor);
			return 0;
		}
	}

	if (info.exists && !dm_tree_add_dev_with_udev_flags(dtree, info.major, info.minor,
							    _get_udev_flags(dm, lv, layer,
									    0, 0, 0))) {
		log_error("Failed to add device (%" PRIu32 ":%" PRIu32") to dtree.",
			  info.major, info.minor);
		return 0;
	}

	if (info.exists && dm->track_pending_delete) {
		log_debug_activation("Tracking pending delete for %s%s%s (%s).",
				     display_lvname(lv), layer ? "-" : "", layer ? : "", dlid);
		if (!str_list_add(dm->cmd->pending_delete_mem, &dm->cmd->pending_delete, dlid))
			return_0;
	}

	/*
	 * Find holders of existing active LV where name starts with 'pvmove',
	 * but it's not anymore PVMOVE LV and also it's not PVMOVE _mimage
	 */
	if (info.exists && !lv_is_pvmove(lv) &&
	    !strchr(lv->name, '_') && !strncmp(lv->name, "pvmove", 6))
		if (!_add_holders_to_dtree(dm, dtree, lv, NULL, &info))
			return_0;

	return 1;
}

struct pool_cb_data {
	struct dev_manager *dm;
	const struct logical_volume *pool_lv;

	int skip_zero;  /* to skip zeroed device header (check first 64B) */
	int exec;       /* which binary to call */
	int opts;
	struct {
		unsigned maj;
		unsigned min;
		unsigned patch;
	} version;
	const char *global;
};

/*
 * Simple version of check function calling 'tool -V'
 *
 * Returns 1 if the tool's version is equal or better to given.
 * Otherwise it returns 0.
 */
static int _check_tool_version(struct cmd_context *cmd, const char *tool,
			       unsigned maj, unsigned min, unsigned patch)
{
	const char *argv[] = { tool, "-V", NULL };
	struct pipe_data pdata;
	FILE *f;
	char buf[128] = { 0 };
	char *nl;
	unsigned v_maj, v_min, v_patch;
	int ret = 0;

	if (!(f = pipe_open(cmd, argv, 0, &pdata))) {
		log_warn("WARNING: Cannot read output from %s.", argv[0]);
	} else {
		if (fgets(buf, sizeof(buf) - 1, f) &&
		    (sscanf(buf, "%u.%u.%u", &v_maj, &v_min, &v_patch) == 3)) {
			if ((v_maj > maj) ||
			    ((v_maj == maj) &&
			     ((v_min > min) ||
			      (v_min == min && v_patch >= patch))))
				ret = 1;

			if ((nl = strchr(buf, '\n')))
				nl[0] = 0; /* cut newline away */

			log_verbose("Found version of %s %s is %s then requested %u.%u.%u.",
				    argv[0], buf, ret ? "better" : "older", maj, min, patch);
		} else
			log_warn("WARNING: Cannot parse output '%s' from %s.", buf, argv[0]);

		(void) pipe_close(&pdata);
	}

	return ret;
}

static int _pool_callback(struct dm_tree_node *node,
			  dm_node_callback_t type, void *cb_data)
{
	int ret, status = 0, fd;
	const struct pool_cb_data *data = cb_data;
	const struct logical_volume *pool_lv = data->pool_lv;
	const struct logical_volume *mlv = first_seg(pool_lv)->metadata_lv;
	struct cmd_context *cmd = pool_lv->vg->cmd;
	long buf[64 / sizeof(long)]; /* buffer for short disk header (64B) */
	int args = 0;
	char *mpath;
	const char *argv[DEFAULT_MAX_EXEC_ARGS + 7] = { /* Max supported 15 args */
		find_config_tree_str_allow_empty(cmd, data->exec, NULL)
	};

	if (!argv[0] || !*argv[0]) /* *_check tool is unconfigured/disabled with "" setting */
		return 1;

	if (lv_is_cache_vol(pool_lv)) {
		if (!(mpath = lv_dmpath_suffix_dup(data->dm->mem, pool_lv, "-cmeta"))) {
			log_error("Failed to build device path for checking cachevol metadata %s.",
			  	 display_lvname(pool_lv));
			return 0;
		}
	} else {
		if (!(mpath = lv_dmpath_dup(data->dm->mem, mlv))) {
			log_error("Failed to build device path for checking pool metadata %s.",
			  	 display_lvname(mlv));
			return 0;
		}
	}

	dm_devs_cache_destroy();

	log_debug("Running check command on %s", mpath);

	if (data->skip_zero) {
		if ((fd = open(mpath, O_RDONLY)) < 0) {
			log_sys_error("open", mpath);
			return 0;
		}
		/* let's assume there is no problem to read 64 bytes */
		if (read(fd, buf, sizeof(buf)) < (int)sizeof(buf)) {
			log_sys_error("read", mpath);
			if (close(fd))
				log_sys_error("close", mpath);
			return 0;
		}
		for (ret = 0; ret < (int) DM_ARRAY_SIZE(buf); ++ret)
			if (buf[ret])
				break;

		if (close(fd))
			log_sys_error("close", mpath);

		if (ret == (int) DM_ARRAY_SIZE(buf)) {
			log_debug_activation("Metadata checking skipped, detected empty disk header on %s.",
					     mpath);
			return 1;
		}
	}

	if (!prepare_exec_args(cmd, argv, &args, data->opts))
		return_0;

	argv[++args] = mpath;

	if (!(ret = exec_cmd(cmd, (const char * const *)argv,
			     &status, 0))) {
		if (status == ENOENT) {
			log_warn("WARNING: Check is skipped, please install recommended missing binary %s!",
				 argv[0]);
			return 1;
		}

		if ((data->version.maj || data->version.min || data->version.patch) &&
		    !_check_tool_version(cmd, argv[0],
					 data->version.maj, data->version.min, data->version.patch)) {
			log_warn("WARNING: Check is skipped, please upgrade installed version of %s!",
				 argv[0]);
			return 1;
		}
		switch (type) {
		case DM_NODE_CALLBACK_PRELOADED:
			log_err_once("Check of pool %s failed (status:%d). "
				     "Manual repair required!",
				     display_lvname(pool_lv), status);
			break;
		default:
			log_warn("WARNING: Integrity check of metadata for pool "
				 "%s failed.", display_lvname(pool_lv));
		}
		/*
		 * FIXME: What should we do here??
		 *
		 * Maybe mark the node, so it's not activating
		 * as pool but as error/linear and let the
		 * dm tree resolve the issue.
		 */
	}

	return ret;
}

static int _pool_register_callback(struct dev_manager *dm,
				   struct dm_tree_node *node,
				   const struct logical_volume *lv)
{
	struct pool_cb_data *data;

	/* Do not skip metadata of testing even for unused thin pools */
#if 0
	/* Skip metadata testing for unused thin pool. */
	if (lv_is_thin_pool(lv) &&
	    (!first_seg(lv)->transaction_id ||
	     ((first_seg(lv)->transaction_id == 1) &&
	      pool_has_message(first_seg(lv), NULL, 0))))
		return 1;
#endif
	/* Skip validation of metadata for lvremove and vgremove */
	if (!dm->activation &&
	    (!strcmp(dm->cmd->name, "lvremove") ||
	     !strcmp(dm->cmd->name, "vgremove"))) {
		log_debug("Skipping %s callback registration for command %s.",
			  display_lvname(lv), dm->cmd->name);
		return 1;
	}

	if (!(data = dm_pool_zalloc(dm->mem, sizeof(*data)))) {
		log_error("Failed to allocated path for callback.");
		return 0;
	}

	data->dm = dm;

	if (lv_is_thin_pool(lv)) {
		data->pool_lv = lv;
		data->skip_zero = 1;
		data->exec = global_thin_check_executable_CFG;
		data->opts = global_thin_check_options_CFG;
		data->global = "thin";
	} else if (lv_is_cache(lv)) { /* cache pool */
		data->pool_lv = first_seg(lv)->pool_lv;
		data->skip_zero = 1; /* cheap read-error detection */
		data->exec = global_cache_check_executable_CFG;
		data->opts = global_cache_check_options_CFG;
		data->global = "cache";
		if (first_seg(first_seg(lv)->pool_lv)->cache_metadata_format > 1) {
			data->version.maj = 0;
			data->version.min = 7;
		}
	} else {
		log_error(INTERNAL_ERROR "Registering unsupported pool callback.");
		return 0;
	}

	dm_tree_node_set_callback(node, _pool_callback, data);

	return 1;
}

static struct id _get_id_for_meta_or_data(const struct lv_segment *lvseg, int meta_or_data)
{
	/* When ID is provided in form of metadata_id or data_id, otherwise use CVOL ID */
	if (meta_or_data && lvseg->metadata_id)
		return *lvseg->metadata_id;

	if (!meta_or_data && lvseg->data_id)
		return *lvseg->data_id;

	return lvseg->pool_lv->lvid.id[1];
}

/* Add special devices _cmeta & _cdata on top of CacheVol to dm tree */
static int _add_cvol_subdev_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
				     const struct logical_volume *lv, int meta_or_data)
{
	const char *layer = meta_or_data ? "cmeta" : "cdata";
	struct dm_pool *mem = dm->track_pending_delete ? dm->cmd->pending_delete_mem : dm->mem;
	struct lv_segment *lvseg = first_seg(lv);
	const struct logical_volume *pool_lv = lvseg->pool_lv;
	struct dm_info info;
	char *name ,*dlid;
	union lvid lvid = { { lv->vg->id, _get_id_for_meta_or_data(lvseg, meta_or_data) } };

	if (!(dlid = dm_build_dm_uuid(mem, UUID_PREFIX, (const char *)&lvid.s, layer)))
		return_0;

	/* Name is actually not really needed here, but aids debugging... */
	if (!(name = dm_build_dm_name(dm->mem, lv->vg->name, pool_lv->name, layer)))
		return_0;

	if (!_info(dm->cmd, name, dlid, 0, 0, 0, &info, NULL, NULL))
		return_0;

	if (info.exists) {
		if (!dm_tree_add_dev_with_udev_flags(dtree, info.major, info.minor,
						     _get_udev_flags(dm, lv, layer, 0, 0, 0))) {
			log_error("Failed to add device (%" PRIu32 ":%" PRIu32") to dtree.", info.major, info.minor);
			return 0;
		}
		if (dm->track_pending_delete) {
			log_debug_activation("Tracking pending delete for %s-%s (%s).",
					     display_lvname(pool_lv), layer, dlid);
			if (!str_list_add(mem, &dm->cmd->pending_delete, dlid))
				return_0;
		}
	}

	return 1;
}

/* Declaration to resolve suspend tree and message passing for thin-pool */
static int _add_target_to_dtree(struct dev_manager *dm,
				struct dm_tree_node *dnode,
				struct lv_segment *seg,
				struct lv_activate_opts *laopts);
/*
 * Add LV and any known dependencies
 */
static int _add_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
			    const struct logical_volume *lv, int origin_only)
{
	uint32_t s;
	struct seg_list *sl;
	struct dm_list *snh;
	struct lv_segment *seg;
	struct dm_tree_node *node;
	const struct logical_volume *plv;

	if (lv_is_pvmove(lv) && (dm->track_pvmove_deps == 2))
		return 1; /* Avoid rechecking of already seen pvmove LV */

	if (lv_is_cache_pool(lv)) {
		if (!dm_list_empty(&lv->segs_using_this_lv)) {
			if (!_add_lv_to_dtree(dm, dtree, seg_lv(first_seg(lv), 0), 0))
				return_0;
			if (!_add_lv_to_dtree(dm, dtree, first_seg(lv)->metadata_lv, 0))
				return_0;
			/* Cache pool does not have a real device node */
			return 1;
		}
		/* Unused cache pool is activated as metadata */
	}

	if (!origin_only && !_add_dev_to_dtree(dm, dtree, lv, NULL))
		return_0;

	/* FIXME Can we avoid doing this every time? */
	/* Reused also for lv_is_external_origin(lv) */
	if (!_add_dev_to_dtree(dm, dtree, lv, "real"))
		return_0;

	if (!origin_only && !_add_dev_to_dtree(dm, dtree, lv, "cow"))
		return_0;

	if (origin_only && lv_is_thin_volume(lv)) {
		if (!_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv)))
			return_0;
#if 0
		/* ? Use origin_only to avoid 'deep' thin pool suspend ? */
		/* FIXME Implement dm_tree_node_skip_childrens optimisation */
		if (!(uuid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
			return_0;
		if ((node = dm_tree_find_node_by_uuid(dtree, uuid)))
			dm_tree_node_skip_childrens(node, 1);
#endif
	}

	if (dm->activation && lv_is_external_origin(lv)) {
		/* Find possible users of external origin lv */
		dm_list_iterate_items(sl, &lv->segs_using_this_lv)
			if (!_add_dev_to_dtree(dm, dtree, sl->seg->lv, lv_layer(sl->seg->lv)))
				return_0;
	}

	if (lv_is_thin_pool(lv)) {
		/*
		 * For both origin_only and !origin_only
		 * skips test for -tpool-real and tpool-cow
		 */
		if (!_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv)))
			return_0;

		/*
		 * TODO: change API and move this code
		 * Could be easier to handle this in _add_dev_to_dtree()
		 * and base this according to info.exists ?
		 */
		if (!dm->activation) {
			if ((node = _cached_dm_tree_node(dm->mem, dtree, lv, lv_layer(lv)))) {
				if (origin_only) {
					struct lv_activate_opts laopts = {
						.origin_only = 1,
						.send_messages = 1 /* Node with messages */
					};
					/*
					 * Add some messages if right node exist in the table only
					 * when building SUSPEND tree for origin-only thin-pool.
					 *
					 * TODO: Fix call of '_add_target_to_dtree()' to add message
					 * to thin-pool node as we already know the pool node exists
					 * in the table. Any better/cleaner API way ?
					 *
					 * Probably some 'new' target method to add messages for any node?
					 */
					if (dm->suspend &&
					    !dm_list_empty(&(first_seg(lv)->thin_messages)) &&
					    !_add_target_to_dtree(dm, node, first_seg(lv), &laopts))
						return_0;
				} else {
					/* Setup callback for non-activation partial tree */
					/* Activation gets own callback when needed */
					/* TODO: extend _cached_dm_info() to return dnode */
					if (!_pool_register_callback(dm, node, lv))
						return_0;
				}
			}
		}
	}

	if (lv_is_vdo_pool(lv)) {
		/*
		 * For both origin_only and !origin_only
		 * skips test for -vpool-real and vpool-cow
		 */
		if (!_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv)))
			return_0;
	}

	if (lv_is_cache(lv)) {
		if (!origin_only && !dm->activation && !dm->track_pending_delete) {
			/* Setup callback for non-activation partial tree */
			/* Activation gets own callback when needed */
			if ((node = _cached_dm_tree_node(dm->mem, dtree, lv, lv_layer(lv))) &&
			    !_pool_register_callback(dm, node, lv))
				return_0;
		}
	}

	if (lv_is_cache_vol(lv))
		/* Cachevol with cache LV spans some extra layers -cdata, -cmeta */
		dm_list_iterate_items(sl, &lv->segs_using_this_lv)
			if (lv_is_cache(sl->seg->lv) &&
			    (!_add_cvol_subdev_to_dtree(dm, dtree, sl->seg->lv, 0) ||
			     !_add_cvol_subdev_to_dtree(dm, dtree, sl->seg->lv, 1) ||
			     !_add_dev_to_dtree(dm, dtree, lv, lv_layer(lv))))
				return_0;

	/* Add any snapshots of this LV */
	if (!origin_only && lv_is_origin(lv))
		dm_list_iterate(snh, &lv->snapshot_segs)
			if (!_add_lv_to_dtree(dm, dtree, dm_list_struct_base(snh, struct lv_segment, origin_list)->cow, 0))
				return_0;

	if (dm->activation && !origin_only && lv_is_merging_origin(lv) &&
	    !_add_lv_to_dtree(dm, dtree, find_snapshot(lv)->lv, 1))
		return_0;

	/* Add any LVs referencing a PVMOVE LV unless told not to. */
	if ((dm->track_pvmove_deps == 1) && lv_is_pvmove(lv)) {
		dm->track_pvmove_deps = 2; /* Mark as already seen */
		dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
			/* If LV is snapshot COW - whole snapshot needs reload */
			plv = lv_is_cow(sl->seg->lv) ? origin_from_cow(sl->seg->lv) : sl->seg->lv;
			if (!_add_lv_to_dtree(dm, dtree, plv, 0))
				return_0;
		}
		dm->track_pvmove_deps = 1;
	}

	if (!dm->track_pending_delete)
		dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
			if (lv_is_pending_delete(sl->seg->lv)) {
				/* LV is referenced by 'cache pending delete LV */
				dm->track_pending_delete = 1;
				if (!_cached_dm_tree_node(dm->mem, dtree, sl->seg->lv, lv_layer(sl->seg->lv)) &&
				    !_add_lv_to_dtree(dm, dtree, sl->seg->lv, 0))
					return_0;
				dm->track_pending_delete = 0;
			}
		}

	/* Add any LVs used by segments in this LV */
	dm_list_iterate_items(seg, &lv->segments) {
		if (seg->external_lv &&
		    !_add_lv_to_dtree(dm, dtree, seg->external_lv,
				      /* For origin LV check for complete device tree */
				      lv_is_origin(seg->external_lv) ? 0 : 1)) /* stack */
			return_0;
		if (seg->log_lv &&
		    !_add_lv_to_dtree(dm, dtree, seg->log_lv, 0))
			return_0;
		if (seg->metadata_lv &&
		    !_add_lv_to_dtree(dm, dtree, seg->metadata_lv, 0))
			return_0;
		if (seg->writecache && seg_is_writecache(seg)) {
			if (!_add_lv_to_dtree(dm, dtree, seg->writecache, dm->activation ? origin_only : 1))
				return_0;
		}
		if (seg->integrity_meta_dev && seg_is_integrity(seg)) {
			if (!_add_lv_to_dtree(dm, dtree, seg->integrity_meta_dev, dm->activation ? origin_only : 1))
				return_0;
		}
		if (seg->pool_lv &&
		    /* When activating and not origin_only detect linear 'overlay' over pool */
		    !_add_lv_to_dtree(dm, dtree, seg->pool_lv, dm->activation ? origin_only : 1))
			return_0;

		for (s = 0; s < seg->area_count; s++) {
			if (seg_type(seg, s) == AREA_LV && seg_lv(seg, s) &&
			    /* origin only for cache without pending delete */
			    (!dm->track_pending_delete || !lv_is_cache(lv)) &&
			    !_add_lv_to_dtree(dm, dtree, seg_lv(seg, s),
					      lv_is_vdo_pool(seg_lv(seg, s)) ? 1 : 0))
				return_0;
			if (seg_is_raid_with_meta(seg) && seg->meta_areas && seg_metalv(seg, s) &&
			    !_add_lv_to_dtree(dm, dtree, seg_metalv(seg, s), 0))
				return_0;
		}

		/* When activating, detect merging LV presence */
		if (dm->activation && seg->merge_lv &&
		    !_add_lv_to_dtree(dm, dtree, seg->merge_lv, 1))
			return_0;
	}

	return 1;
}

static struct dm_tree *_create_partial_dtree(struct dev_manager *dm, const struct logical_volume *lv, int origin_only)
{
	struct dm_tree *dtree;

	if (!(dtree = dm_tree_create())) {
		log_debug_activation("Partial dtree creation failed for %s.",
				     display_lvname(lv));
		return NULL;
	}

	dm_tree_set_optional_uuid_suffixes(dtree, (const char**)_uuid_suffix_list);

	if (!_add_lv_to_dtree(dm, dtree, lv, (lv_is_origin(lv) || lv_is_thin_volume(lv) || lv_is_thin_pool(lv)) ? origin_only : 0))
		goto_bad;

	return dtree;

bad:
	dm_tree_free(dtree);
	return NULL;
}

static char *_add_error_or_zero_device(struct dev_manager *dm, struct dm_tree *dtree,
				       struct lv_segment *seg, int s, int use_zero)
{
	char *dlid, *name;
	char errid[32];
	struct dm_tree_node *node;
	struct lv_segment *seg_i;
	struct dm_info info;
	int segno = -1, i = 0;
	uint64_t size = (uint64_t) _seg_len(seg) * seg->lv->vg->extent_size;

	dm_list_iterate_items(seg_i, &seg->lv->segments) {
		if (seg == seg_i) {
			segno = i;
			break;
		}
		++i;
	}

	if (segno < 0) {
		log_error(INTERNAL_ERROR "_add_error_or_zero_device called with bad segment.");
		return NULL;
	}

	sprintf(errid, "missing_%d_%d", segno, s);

	if (!(dlid = build_dm_uuid(dm->mem, seg->lv, errid)))
		return_NULL;

	if (!(name = dm_build_dm_name(dm->mem, seg->lv->vg->name,
				      seg->lv->name, errid)))
		return_NULL;

	if (!_info(dm->cmd, name, dlid, 0, 0, 0, &info, NULL, NULL))
		return_NULL;

	if (!info.exists) {
		/* Create new node */
		if (!(node = dm_tree_add_new_dev(dtree, name, dlid, 0, 0, 0, 0, 0)))
			return_NULL;

		if (use_zero) {
			if (!dm_tree_node_add_zero_target(node, size))
				return_NULL;
		} else
			if (!dm_tree_node_add_error_target(node, size))
				return_NULL;
	} else {
		/* Already exists */
		if (!dm_tree_add_dev(dtree, info.major, info.minor)) {
			log_error("Failed to add device (%" PRIu32 ":%" PRIu32") to dtree.",
				  info.major, info.minor);
			return NULL;
		}
	}

	return dlid;
}

static int _add_error_area(struct dev_manager *dm, struct dm_tree_node *node,
			   struct lv_segment *seg, int s)
{
	char *dlid;
	uint64_t extent_size = seg->lv->vg->extent_size;
	int use_zero = !strcmp(dm->cmd->stripe_filler, TARGET_NAME_ZERO) ? 1 : 0;

	if (!strcmp(dm->cmd->stripe_filler, TARGET_NAME_ERROR) || use_zero) {
		/*
		 * FIXME, the tree pointer is first field of dm_tree_node, but
		 * we don't have the struct definition available.
		 */
		struct dm_tree **tree = (struct dm_tree **) node;
		if (!(dlid = _add_error_or_zero_device(dm, *tree, seg, s, use_zero)))
			return_0;
		if (!dm_tree_node_add_target_area(node, NULL, dlid, extent_size * seg_le(seg, s)))
			return_0;
	} else
		if (!dm_tree_node_add_target_area(node, dm->cmd->stripe_filler, NULL, UINT64_C(0)))
			return_0;

	return 1;
}

static int _bad_pv_area(struct lv_segment *seg, uint32_t s)
{
	struct stat info;
	const char *name;
	struct device *dev;

	if (!seg_pvseg(seg, s))
		return 1;
	if (!seg_pv(seg, s))
		return 1;
	if (!(dev = seg_dev(seg, s)))
		return 1;
	if (dm_list_empty(&dev->aliases))
		return 1;
	/* FIXME Avoid repeating identical stat in dm_tree_node_add_target_area */
	name = dev_name(dev);
	if (stat(name, &info) < 0)
		return 1;
	if (!S_ISBLK(info.st_mode))
		return 1;
	return 0;
}

int add_areas_line(struct dev_manager *dm, struct lv_segment *seg,
		   struct dm_tree_node *node, uint32_t start_area,
		   uint32_t areas)
{
	struct cmd_context *cmd = seg->lv->vg->cmd;
	uint64_t extent_size = seg->lv->vg->extent_size;
	uint32_t s;
	char *dlid;
	const char *name;
	unsigned num_error_areas = 0;
	unsigned num_existing_areas = 0;

	for (s = start_area; s < areas; s++) {
		if (((seg_type(seg, s) == AREA_PV) && _bad_pv_area(seg, s)) ||
		    ((seg_type(seg, s) == AREA_LV) && !seg_lv(seg, s))) {
			if (!cmd->partial_activation) {
				if (!cmd->degraded_activation ||
				    (!lv_is_raid_type(seg->lv) &&
				     !lv_is_integrity(seg->lv) &&
				     !lv_is_integrity_metadata(seg->lv) &&
				     !lv_is_integrity_origin(seg->lv))) {
					log_error("Aborting.  LV %s is incomplete and --activationmode partial was not specified.",
						  display_lvname(seg->lv));
					return 0;
				}
			}
			if (!_add_error_area(dm, node, seg, s))
				return_0;
			num_error_areas++;
		} else if (seg_type(seg, s) == AREA_PV) {
			struct device *dev = seg_dev(seg, s);
			name = dm_list_empty(&dev->aliases) ? NULL : dev_name(dev);

			if (!dm_tree_node_add_target_area(node, name, NULL,
				    (seg_pv(seg, s)->pe_start + (extent_size * seg_pe(seg, s)))))
				return_0;
			num_existing_areas++;
		} else if (seg_is_raid(seg)) {
			/*
			 * RAID can handle unassigned areas.  It simple puts
			 * '- -' in for the metadata/data device pair.  This
			 * is a valid way to indicate to the RAID target that
			 * the device is missing.
			 *
			 * If an image is marked as VISIBLE_LV and !LVM_WRITE,
			 * it means the device has temporarily been extracted
			 * from the array.  It may come back at a future date,
			 * so the bitmap must track differences.  Again, '- -'
			 * is used in the CTR table.
			 */
			if ((seg_type(seg, s) == AREA_UNASSIGNED) ||
			    (lv_is_visible(seg_lv(seg, s)) &&
			     !(seg_lv(seg, s)->status & LVM_WRITE))) {
				/* One each for metadata area and data area */
				if (!dm_tree_node_add_null_area(node, 0) ||
				    !dm_tree_node_add_null_area(node, 0))
					return_0;
				continue;
			}

			if (seg->meta_areas && seg_metalv(seg, s)) {
				if (!(dlid = build_dm_uuid(dm->mem, seg_metalv(seg, s), NULL)))
					return_0;
				if (!dm_tree_node_add_target_area(node, NULL, dlid, extent_size * seg_metale(seg, s)))
					return_0;
			} else if (!dm_tree_node_add_null_area(node, 0))
				return_0;

			if (!(dlid = build_dm_uuid(dm->mem, seg_lv(seg, s), NULL)))
				return_0;
			if (!dm_tree_node_add_target_area(node, NULL, dlid, extent_size * seg_le(seg, s)))
				return_0;
		} else if (seg_type(seg, s) == AREA_LV) {

			if (!(dlid = build_dm_uuid(dm->mem, seg_lv(seg, s), NULL)))
				return_0;
			if (!dm_tree_node_add_target_area(node, NULL, dlid, extent_size * seg_le(seg, s)))
				return_0;
		} else {
			log_error(INTERNAL_ERROR "Unassigned area found in LV %s.",
				  display_lvname(seg->lv));
			return 0;
		}
	}

        if (num_error_areas) {
		/* Thins currently do not support partial activation */
		if (lv_is_thin_type(seg->lv)) {
			log_error("Cannot activate %s: pool incomplete.",
				  display_lvname(seg->lv));
			return 0;
		}
	}

	return 1;
}

static int _add_layer_target_to_dtree(struct dev_manager *dm,
				      struct dm_tree_node *dnode,
				      const struct logical_volume *lv)
{
	const char *layer_dlid;

	if (!(layer_dlid = build_dm_uuid(dm->mem, lv, lv_layer(lv))))
		return_0;


	/* Add linear mapping over layered LV */
	/* From VDO layer expose ONLY vdo pool header, we would need to use virtual size otherwise */
	if (!add_linear_area_to_dtree(dnode, lv_is_vdo_pool(lv) ? 8 : lv->size,
				      lv->vg->extent_size,
				      lv->vg->cmd->use_linear_target,
				      lv->vg->name, lv->name) ||
	    !dm_tree_node_add_target_area(dnode, NULL, layer_dlid, 0))
		return_0;

	return 1;
}

static int _add_origin_target_to_dtree(struct dev_manager *dm,
				       struct dm_tree_node *dnode,
				       const struct logical_volume *lv)
{
	const char *real_dlid;

	if (!(real_dlid = build_dm_uuid(dm->mem, lv, "real")))
		return_0;

	if (!dm_tree_node_add_snapshot_origin_target(dnode, lv->size, real_dlid))
		return_0;

	return 1;
}

static int _add_snapshot_merge_target_to_dtree(struct dev_manager *dm,
					       struct dm_tree_node *dnode,
					       const struct logical_volume *lv)
{
	const char *origin_dlid, *cow_dlid, *merge_dlid;
	struct lv_segment *merging_snap_seg = find_snapshot(lv);

	if (!lv_is_merging_origin(lv)) {
		log_error(INTERNAL_ERROR "LV %s is not merging snapshot.",
			  display_lvname(lv));
		return 0;
	}

	if (!(origin_dlid = build_dm_uuid(dm->mem, lv, "real")))
		return_0;

	if (!(cow_dlid = build_dm_uuid(dm->mem, merging_snap_seg->cow, "cow")))
		return_0;

	if (!(merge_dlid = build_dm_uuid(dm->mem, merging_snap_seg->cow, NULL)))
		return_0;

	if (!dm_tree_node_add_snapshot_merge_target(dnode, lv->size, origin_dlid,
						    cow_dlid, merge_dlid,
						    merging_snap_seg->chunk_size))
		return_0;

	return 1;
}

static int _add_snapshot_target_to_dtree(struct dev_manager *dm,
					 struct dm_tree_node *dnode,
					 const struct logical_volume *lv,
					 struct lv_activate_opts *laopts)
{
	const char *origin_dlid;
	const char *cow_dlid;
	struct lv_segment *snap_seg;
	uint64_t size;

	if (!(snap_seg = find_snapshot(lv))) {
		log_error("Couldn't find snapshot for '%s'.",
			  display_lvname(lv));
		return 0;
	}

	if (!(origin_dlid = build_dm_uuid(dm->mem, snap_seg->origin, "real")))
		return_0;

	if (!(cow_dlid = build_dm_uuid(dm->mem, snap_seg->cow, "cow")))
		return_0;

	size = (uint64_t) snap_seg->len * snap_seg->origin->vg->extent_size;

	if (!laopts->no_merging && lv_is_merging_cow(lv)) {
		/* cow is to be merged so load the error target */
		if (!dm_tree_node_add_error_target(dnode, size))
			return_0;
	}
	else if (!dm_tree_node_add_snapshot_target(dnode, size, origin_dlid,
						   cow_dlid, 1, snap_seg->chunk_size))
		return_0;

	return 1;
}

static int _add_target_to_dtree(struct dev_manager *dm,
				struct dm_tree_node *dnode,
				struct lv_segment *seg,
				struct lv_activate_opts *laopts)
{
	uint64_t extent_size = seg->lv->vg->extent_size;

	if (!seg->segtype->ops->add_target_line) {
		log_error(INTERNAL_ERROR "_emit_target cannot handle "
			  "segment type %s.", lvseg_name(seg));
		return 0;
	}

	return seg->segtype->ops->add_target_line(dm, dm->mem, dm->cmd,
						  &dm->target_state, seg,
						  laopts, dnode,
						  extent_size * _seg_len(seg),
						  &dm->pvmove_mirror_count);
}

static int _add_new_external_lv_to_dtree(struct dev_manager *dm,
					 struct dm_tree *dtree,
					 struct logical_volume *external_lv,
					 struct lv_activate_opts *laopts)
{
	struct seg_list *sl;
	struct dm_tree_node *dnode;

	/* We've already processed this node if it already has a context ptr */
	if ((dnode = _cached_dm_tree_node(dm->mem, dtree, external_lv, lv_layer(external_lv))) &&
	    dm_tree_node_get_context(dnode)) {
		/* Skip repeated invocation of external lv processing */
		log_debug_activation("Skipping users for already added external origin LV %s.",
				     display_lvname(external_lv));
		return 1;
	}

	log_debug_activation("Adding external origin LV %s and all active users.",
			     display_lvname(external_lv));
	/* If there is active origin LV, add whole origin device, otherwise only -layer */
	if (!_add_new_lv_to_dtree(dm, dtree, external_lv, laopts,
				  (lv_is_origin(external_lv) &&
				   _cached_dm_info(dm->mem, dtree, external_lv, NULL))
				  ? NULL : lv_layer(external_lv)))
		return_0;

	/*
	 * Add all ACTIVE LVs using this external origin LV. This is
	 * needed because of conversion of thin which could have been
	 * also an old-snapshot to external origin.
	 */
	dm_list_iterate_items(sl, &external_lv->segs_using_this_lv)
		/* Add only active layered devices (also avoids loop) */
		if (_cached_dm_info(dm->mem, dtree, sl->seg->lv,
				    lv_layer(sl->seg->lv)) &&
		    !_add_new_lv_to_dtree(dm, dtree, sl->seg->lv,
					  laopts, lv_layer(sl->seg->lv)))
			return_0;

	log_debug_activation("Finished adding external origin LV %s and all active users.",
			     display_lvname(external_lv));

	return 1;
}

static int _add_new_cvol_subdev_to_dtree(struct dev_manager *dm,
					 struct dm_tree *dtree,
					 const struct logical_volume *lv,
					 struct lv_activate_opts *laopts,
					 struct lv_layer *lvlayer,
					 int meta_or_data)
{
	const char *layer = meta_or_data ? "cmeta" : "cdata";
	const struct lv_segment *lvseg = first_seg(lv);
	uint64_t size = meta_or_data ? lvseg->metadata_len : lvseg->data_len;
	const struct logical_volume *pool_lv = lvseg->pool_lv;
	struct dm_tree_node *dnode;
	char *dlid, *dlid_pool, *name;
	union lvid lvid = { { lv->vg->id, _get_id_for_meta_or_data(lvseg, meta_or_data) } };

	if (!(dlid = dm_build_dm_uuid(dm->mem, UUID_PREFIX, (const char *)&lvid.s, layer)))
		return_0;

	if (!(name = dm_build_dm_name(dm->mem, lv->vg->name, pool_lv->name, layer)))
		return_0;

	if (!(dnode = dm_tree_add_new_dev_with_udev_flags(dtree, name, dlid, -1, -1,
							  read_only_lv(lv, laopts, layer),
							  ((lv->vg->status & PRECOMMITTED) | laopts->revert) ? 1 : 0,
							  lvlayer,
							  _get_udev_flags(dm, lv, layer, laopts->noscan,
									  laopts->temporary, 0))))
		return_0;

	if (dm->track_pending_delete) {
		log_debug_activation("Using error for pending delete of %s-%s.",
				     display_lvname(lv), layer);
		if (!dm_tree_node_add_error_target(dnode, size))
			return_0;
	} else {
		/* add load_segment to meta dnode: linear, size of meta area */
		if (!add_linear_area_to_dtree(dnode, size, lv->vg->extent_size,
					      lv->vg->cmd->use_linear_target,
					      lv->vg->name, lv->name))
			return_0;

		if (!(dlid_pool = build_dm_uuid(dm->mem, pool_lv, NULL)))
			return_0;

		/* add seg_area to prev load_seg: offset 0 maps to cachevol lv offset 0 */
		if (!dm_tree_node_add_target_area(dnode, NULL, dlid_pool,
						  meta_or_data ? 0 : lvseg->metadata_len))
			return_0;
	}

	return 1;
}

static int _add_segment_to_dtree(struct dev_manager *dm,
				 struct dm_tree *dtree,
				 struct dm_tree_node *dnode,
				 struct lv_segment *seg,
				 struct lv_activate_opts *laopts,
				 const char *layer)
{
	uint32_t s;
	struct lv_segment *seg_present;
	const struct segment_type *segtype;
	const char *target_name;

	/* Ensure required device-mapper targets are loaded */
	seg_present = find_snapshot(seg->lv) ? : seg;
	segtype = seg_present->segtype;

	target_name = (segtype->ops->target_name ?
		       segtype->ops->target_name(seg_present, laopts) :
		       segtype->name);

	log_debug_activation("Checking kernel supports %s segment type for %s%s%s",
			     target_name, display_lvname(seg->lv),
			     layer ? "-" : "", layer ? : "");

	if (segtype->ops->target_present &&
	    !segtype->ops->target_present(seg_present->lv->vg->cmd,
					  seg_present, NULL)) {
		log_error("Can't process LV %s: %s target support missing "
			  "from kernel?", display_lvname(seg->lv), target_name);
		return 0;
	}

	/* Add external origin layer */
	if (seg->external_lv &&
	    !_add_new_external_lv_to_dtree(dm, dtree, seg->external_lv, laopts))
		return_0;

	/* Add mirror log */
	if (seg->log_lv &&
	    !_add_new_lv_to_dtree(dm, dtree, seg->log_lv, laopts, NULL))
		return_0;

	/* Add pool metadata */
	if (seg->metadata_lv &&
	    !_add_new_lv_to_dtree(dm, dtree, seg->metadata_lv, laopts, NULL))
		return_0;

	/* Add pool layer */
	if (seg->pool_lv && !laopts->origin_only &&
	    !_add_new_lv_to_dtree(dm, dtree, seg->pool_lv, laopts,
				  lv_layer(seg->pool_lv)))
		return_0;

	if (seg->writecache && !laopts->origin_only &&
	    !_add_new_lv_to_dtree(dm, dtree, seg->writecache, laopts,
				  lv_layer(seg->writecache)))
		return_0;

	if (seg->integrity_meta_dev && !laopts->origin_only &&
	    !_add_new_lv_to_dtree(dm, dtree, seg->integrity_meta_dev, laopts,
				  lv_layer(seg->integrity_meta_dev)))
		return_0;

	/* Add any LVs used by this segment */
	for (s = 0; s < seg->area_count; ++s) {
		if ((seg_type(seg, s) == AREA_LV) &&
		    /* do not bring up tracked image */
		    !lv_is_raid_image_with_tracking(seg_lv(seg, s)) &&
		    /* origin only for cache without pending delete */
		    (!dm->track_pending_delete || !seg_is_cache(seg)) &&
		    !_add_new_lv_to_dtree(dm, dtree, seg_lv(seg, s),
					  laopts,
					  lv_is_vdo_pool(seg_lv(seg, s)) ?
					  lv_layer(seg_lv(seg, s)) : NULL))
			return_0;
		if (seg_is_raid_with_meta(seg) && seg->meta_areas && seg_metalv(seg, s) &&
		    !lv_is_raid_image_with_tracking(seg_lv(seg, s)) &&
		    !_add_new_lv_to_dtree(dm, dtree, seg_metalv(seg, s),
					  laopts, NULL))
			return_0;
	}

	if (dm->track_pending_delete) {
		/* Replace target and all its used devs with error mapping */
		log_debug_activation("Using error for pending delete %s.",
				     display_lvname(seg->lv));
		if (!dm_tree_node_add_error_target(dnode, (uint64_t)seg->lv->vg->extent_size * _seg_len(seg)))
			return_0;
	} else if (!_add_target_to_dtree(dm, dnode, seg, laopts))
		return_0;

	return 1;
}

static int _add_new_lv_to_dtree(struct dev_manager *dm, struct dm_tree *dtree,
				const struct logical_volume *lv, struct lv_activate_opts *laopts,
				const char *layer)
{
	struct lv_segment *seg;
	struct lv_layer *lvlayer;
	struct seg_list *sl;
	struct dm_list *snh;
	struct dm_tree_node *dnode;
	const struct dm_info *dinfo;
	char *name, *dlid;
	uint32_t max_stripe_size = UINT32_C(0);
	uint32_t read_ahead = lv->read_ahead;
	uint32_t read_ahead_flags = UINT32_C(0);
	int save_pending_delete = dm->track_pending_delete;
	int merge_in_progress = 0;

	if (!(lvlayer = dm_pool_alloc(dm->mem, sizeof(*lvlayer)))) {
		log_error("_add_new_lv_to_dtree: pool alloc failed for %s %s.",
			  display_lvname(lv), layer);
		return 0;
	}
	lvlayer->lv = lv;
	lvlayer->visible_component = (laopts->component_lv == lv) ? 1 : 0;

	log_debug_activation("Adding new LV %s%s%s to dtree", display_lvname(lv),
			     layer ? "-" : "", layer ? : "");
	/* LV with pending delete is never put new into a table */
	if (lv_is_pending_delete(lv) && !_cached_dm_info(dm->mem, dtree, lv, NULL))
		return 1; /* Replace with error only when already exists */

	if (lv_is_cache_pool(lv) &&
	    !dm_list_empty(&lv->segs_using_this_lv)) {
		/* cache pool is 'meta' LV and does not have a real device node */
		if (!_add_new_lv_to_dtree(dm, dtree, seg_lv(first_seg(lv), 0), laopts, NULL))
			return_0;
		if (!_add_new_lv_to_dtree(dm, dtree, first_seg(lv)->metadata_lv, laopts, NULL))
			return_0;
		return 1;
	}

	/* FIXME Seek a simpler way to lay out the snapshot-merge tree. */

	if (!layer && lv_is_merging_origin(lv)) {
		seg = find_snapshot(lv);
		/*
		 * Prevent merge if merge isn't currently possible:
		 * either origin or merging snapshot are open
		 * - for old snaps use "snapshot-merge" if it is already in use
		 * - open_count is always retrieved (as of dm-ioctl 4.7.0)
		 *   so just use the tree's existing nodes' info
		 */
		if ((dinfo = _cached_dm_info(dm->mem, dtree, lv, NULL))) {
			/* Merging origin LV is present, check if merging is already running. */
			if ((seg_is_thin_volume(seg) && _lv_has_thin_device_id(dm->mem, lv, NULL, seg->device_id)) ||
			    (!seg_is_thin_volume(seg) && lv_has_target_type(dm->mem, lv, NULL, TARGET_NAME_SNAPSHOT_MERGE))) {
				log_debug_activation("Merging of snapshot volume %s to origin %s is in progress.",
						     display_lvname(seg->lv), display_lvname(seg->lv));
				merge_in_progress = 1; /* Merge is already running */
			} /* Merge is not yet running, so check if it can be started */
			else if (laopts->resuming) {
				log_debug_activation("Postponing pending snapshot merge for origin %s, "
						     "merge was not started before suspend.",
						     display_lvname(lv));
				laopts->no_merging = 1; /* Cannot be reloaded in suspend */
			} /* Non-resuming merge requires origin to be unused */
			else if (dinfo->open_count) {
				log_debug_activation("Postponing pending snapshot merge for origin %s, "
						     "origin volume is opened.",
						     display_lvname(lv));
				laopts->no_merging = 1;
			}
		}

		/* If merge would be still undecided, look as snapshot */
		if (!merge_in_progress && !laopts->no_merging &&
		    (dinfo = _cached_dm_info(dm->mem, dtree,
					     seg_is_thin_volume(seg) ?
					     seg->lv : seg->cow, NULL))) {
			if (seg_is_thin_volume(seg)) {
				/* Active thin snapshot prevents merge */
				log_debug_activation("Postponing pending snapshot merge for origin volume %s, "
						     "merging thin snapshot volume %s is active.",
						     display_lvname(lv), display_lvname(seg->lv));
				laopts->no_merging = 1;
			} else if (dinfo->open_count) {
				log_debug_activation("Postponing pending snapshot merge for origin volume %s, "
						     "merging snapshot volume %s is opened.",
						     display_lvname(lv), display_lvname(seg->lv));
				laopts->no_merging = 1;
			}
		}
	}

	if (!(name = dm_build_dm_name(dm->mem, lv->vg->name, lv->name, layer)))
		return_0;

	/* Even unused thin-pool still needs to get layered  UUID -suffix */
	if (!layer && lv_is_new_thin_pool(lv))
		layer = lv_layer(lv);

	/* Adds -real to the dm uuid of wcorig LV. */
	if (!layer && lv_is_writecache_origin(lv))
		layer = lv_layer(lv); /* "real" */

	/*
	 * FIXME: we would like to have -private suffixes used for device not processed by udev
	 * however ATM we also sometimes want to provide /dev/vg/lv symlinks to such devices
	 * and also be able to correctly report its status with lvs.
	 *
	 * Until problems are resolved this code path needs to be disabled.
	 */
	if (0 && lvlayer->visible_component) {
		/* Component LV will be public, do not add any layer suffixes */
		if (!(dlid = dm_build_dm_uuid(dm->mem, UUID_PREFIX, lv->lvid.s, NULL)))
			return_0;
	} else if (!(dlid = build_dm_uuid(dm->mem, lv,layer)))
		return_0;

	/* We've already processed this node if it already has a context ptr */
	if ((dnode = dm_tree_find_node_by_uuid(dtree, dlid)) &&
	    dm_tree_node_get_context(dnode))
		return 1;

	/*
	 * Add LV to dtree.
	 * If we're working with precommitted metadata, clear any
	 * existing inactive table left behind.
	 * Major/minor settings only apply to the visible layer.
	 */
	/* FIXME Move the clear from here until later, so we can leave
	 * identical inactive tables untouched. (For pvmove.)
	 */
	if (!(dnode = dm_tree_add_new_dev_with_udev_flags(dtree, name, dlid,
					     layer ? UINT32_C(0) : (uint32_t) lv->major,
					     layer ? UINT32_C(0) : (uint32_t) lv->minor,
					     read_only_lv(lv, laopts, layer),
					     ((lv->vg->status & PRECOMMITTED) | laopts->revert) ? 1 : 0,
					     lvlayer,
					     _get_udev_flags(dm, lv, layer, laopts->noscan, laopts->temporary,
							     lvlayer->visible_component))))
		return_0;

	/* Store existing name so we can do rename later */
	lvlayer->old_name = dm_tree_node_get_name(dnode);

	/* Create table */
	dm->pvmove_mirror_count = 0u;

	if (lv_is_pending_delete(lv))
		/* Handle LVs with pending delete */
		/* Fow now used only by cache segtype, TODO snapshots */
		dm->track_pending_delete = 1;

	if (lv_is_cache_vol(lv))
		dm_list_iterate_items(sl, &lv->segs_using_this_lv)
			if (lv_is_cache(sl->seg->lv) &&
			    /* Cachevol is used by cache LV segment -> add cvol-cdata/cmeta extra layer */
			    (!_add_new_cvol_subdev_to_dtree(dm, dtree, sl->seg->lv, laopts, lvlayer, 0) ||
			     !_add_new_cvol_subdev_to_dtree(dm, dtree, sl->seg->lv, laopts, lvlayer, 1)))
				return_0;

	/* This is unused cache-pool - make metadata accessible */
	if (lv_is_cache_pool(lv))
		lv = first_seg(lv)->metadata_lv;

	/* If this is a snapshot origin, add real LV */
	/* If this is a snapshot origin + merging snapshot, add cow + real LV */
	/* Snapshot origin could be also external origin */
	if (lv_is_origin(lv) && !layer) {
		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, "real"))
			return_0;
		if (!laopts->no_merging && lv_is_merging_origin(lv)) {
			if (!_add_new_lv_to_dtree(dm, dtree,
						  find_snapshot(lv)->cow, laopts, "cow"))
				return_0;
			/*
			 * Must also add "real" LV for use when
			 * snapshot-merge target is added
			 */
			if (!_add_snapshot_merge_target_to_dtree(dm, dnode, lv))
				return_0;
		} else if (!_add_origin_target_to_dtree(dm, dnode, lv))
			return_0;

		/* Add any snapshots of this LV */
		dm_list_iterate(snh, &lv->snapshot_segs)
			if (!_add_new_lv_to_dtree(dm, dtree,
						  dm_list_struct_base(snh, struct lv_segment,
								      origin_list)->cow,
						  laopts, NULL))
				return_0;
	} else if (lv_is_cow(lv) && !layer) {
		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, "cow"))
			return_0;
		if (!_add_snapshot_target_to_dtree(dm, dnode, lv, laopts))
			return_0;
	} else if (!layer && ((lv_is_thin_pool(lv) && !lv_is_new_thin_pool(lv)) ||
                              lv_is_vdo_pool(lv) ||
			      lv_is_external_origin(lv))) {
		/* External origin or 'used' Thin pool or VDO pool is using layer */
		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts, lv_layer(lv)))
			return_0;
		if (!_add_layer_target_to_dtree(dm, dnode, lv))
			return_0;
	} else {
		/* Add 'real' segments for LVs */
		dm_list_iterate_items(seg, &lv->segments) {
			if (!_add_segment_to_dtree(dm, dtree, dnode, seg, laopts, layer))
				return_0;
			if (max_stripe_size < seg->stripe_size * seg->area_count)
				max_stripe_size = seg->stripe_size * seg->area_count;
		}

		if (!layer && lv_is_vdo_pool(lv) &&
		    !_add_layer_target_to_dtree(dm, dnode, lv))
			return_0;
	}

	/* Setup thin pool callback */
	if (lv_is_thin_pool(lv) && layer &&
	    !_pool_register_callback(dm, dnode, lv))
		return_0;

	if (lv_is_cache(lv) && !lv_is_cache_vol(first_seg(lv)->pool_lv) &&
	    /* Register callback only for layer activation or non-layered cache LV */
	    (layer || !lv_layer(lv)) &&
	    /* Register callback when metadata LV is NOT already active */
	    !_cached_dm_info(dm->mem, dtree, first_seg(first_seg(lv)->pool_lv)->metadata_lv, NULL) &&
	    !_pool_register_callback(dm, dnode, lv))
		return_0;

	if (lv_is_cache(lv) && lv_is_cache_vol(first_seg(lv)->pool_lv) &&
	    /* Register callback only for layer activation or non-layered cache LV */
	    (layer || !lv_layer(lv)) &&
	    /* Register callback when cachevol LV is NOT already active */
	    !_cached_dm_info(dm->mem, dtree, first_seg(lv)->pool_lv, NULL) &&
	    !_pool_register_callback(dm, dnode, lv))
		return_0;

	/*
	 * Update tables for ANY PVMOVE holders for active LV where the name starts with 'pvmove',
	 * but it's not anymore PVMOVE LV and also it's not a PVMOVE _mimage LV.
	 * When resume happens, tables MUST be already preloaded with correct entries!
	 * (since we can't preload different table while devices are suspended)
	 */
	if (!lv_is_pvmove(lv) && !strncmp(lv->name, "pvmove", 6) && !strchr(lv->name, '_') &&
	    (dinfo = _cached_dm_info(dm->mem, dtree, lv, NULL)))
		if (!_add_holders_to_dtree(dm, dtree, lv, laopts, dinfo))
			return_0;

	if (read_ahead == DM_READ_AHEAD_AUTO) {
		/* we need RA at least twice a whole stripe - see the comment in md/raid0.c */
		read_ahead = max_stripe_size * 2;
		/* FIXME: layered device read-ahead */
		if (!read_ahead)
			lv_calculate_readahead(lv, &read_ahead);
		read_ahead_flags = DM_READ_AHEAD_MINIMUM_FLAG;
	}

	dm_tree_node_set_read_ahead(dnode, read_ahead, read_ahead_flags);

	/* Add any LVs referencing a PVMOVE LV unless told not to */
	if (dm->track_pvmove_deps && lv_is_pvmove(lv))
		dm_list_iterate_items(sl, &lv->segs_using_this_lv)
			if (!_add_new_lv_to_dtree(dm, dtree, sl->seg->lv, laopts, NULL))
				return_0;

	dm->track_pending_delete = save_pending_delete; /* restore */

	return 1;
}

/* FIXME: symlinks should be created/destroyed at the same time
 * as the kernel devices but we can't do that from within libdevmapper
 * at present so we must walk the tree twice instead. */

/*
 * Create LV symlinks for children of supplied root node.
 */
static int _create_lv_symlinks(struct dev_manager *dm, struct dm_tree_node *root)
{
	void *handle = NULL;
	struct dm_tree_node *child;
	struct lv_layer *lvlayer;
	char *old_vgname, *old_lvname, *old_layer;
	char *new_vgname, *new_lvname, *new_layer;
	const char *name;
	int r = 1;

	/* Nothing to do if udev fallback is disabled. */
	if (!_check_udev_fallback(dm->cmd)) {
		fs_set_create();
		return 1;
	}

	while ((child = dm_tree_next_child(&handle, root, 0))) {
		if (!(lvlayer = dm_tree_node_get_context(child)))
			continue;

		/* Detect rename */
		name = dm_tree_node_get_name(child);

		if (name && lvlayer->old_name && *lvlayer->old_name && strcmp(name, lvlayer->old_name)) {
			if (!dm_split_lvm_name(dm->mem, lvlayer->old_name, &old_vgname, &old_lvname, &old_layer)) {
				log_error("_create_lv_symlinks: Couldn't split up old device name %s.", lvlayer->old_name);
				return 0;
			}
			if (!dm_split_lvm_name(dm->mem, name, &new_vgname, &new_lvname, &new_layer)) {
				log_error("_create_lv_symlinks: Couldn't split up new device name %s.", name);
				return 0;
			}
			if (!fs_rename_lv(lvlayer->lv, name, old_vgname, old_lvname))
				r = 0;
			continue;
		}
		if (_lv_has_mknode(lvlayer->lv) || lvlayer->visible_component) {
			if (!_dev_manager_lv_mknodes(lvlayer->lv))
				r = 0;
			continue;
		}
		if (!_dev_manager_lv_rmnodes(lvlayer->lv))
			r = 0;
	}

	return r;
}

/*
 * Remove LV symlinks for children of supplied root node.
 */
static int _remove_lv_symlinks(struct dev_manager *dm, struct dm_tree_node *root)
{
	void *handle = NULL;
	struct dm_tree_node *child;
	char *vgname, *lvname, *layer;
	int r = 1;

	/* Nothing to do if udev fallback is disabled. */
	if (!_check_udev_fallback(dm->cmd))
		return 1;

	while ((child = dm_tree_next_child(&handle, root, 0))) {
		if (!dm_split_lvm_name(dm->mem, dm_tree_node_get_name(child), &vgname, &lvname, &layer)) {
			r = 0;
			continue;
		}

		if (!*vgname)
			continue;

		/* only top level layer has symlinks */
		if (*layer)
			continue;

		fs_del_lv_byname(dm->cmd->dev_dir, vgname, lvname,
				 dm->cmd->current_settings.udev_rules);
	}

	return r;
}

static int _clean_tree(struct dev_manager *dm, struct dm_tree_node *root, const char *non_toplevel_tree_dlid)
{
	void *handle = NULL;
	struct dm_tree_node *child;
	char *vgname, *lvname, *layer;
	const char *name, *uuid;
	struct dm_str_list *dl;

	while ((child = dm_tree_next_child(&handle, root, 0))) {
		if (!(name = dm_tree_node_get_name(child)))
			continue;

		if (!(uuid = dm_tree_node_get_uuid(child)))
			continue;

		if (!dm_split_lvm_name(dm->mem, name, &vgname, &lvname, &layer)) {
			log_error("_clean_tree: Couldn't split up device name %s.", name);
			return 0;
		}

		/* Not meant to be top level? */
		if (!*layer)
			continue;

		/* If operation was performed on a partial tree, don't remove it */
		if (non_toplevel_tree_dlid && !strcmp(non_toplevel_tree_dlid, uuid))
			continue;

		if (!(uuid = dm_pool_strdup(dm->cmd->pending_delete_mem, uuid))) {
			log_error("_clean_tree: Failed to duplicate uuid.");
			return 0;
		}

		if (!str_list_add(dm->cmd->pending_delete_mem, &dm->cmd->pending_delete, uuid))
			return_0;
	}

	/* Deactivate any tracked pending delete nodes */
	if (!dm_list_empty(&dm->cmd->pending_delete) && !dm_get_suspended_counter()) {
		fs_unlock();
		dm_tree_set_cookie(root, fs_get_cookie());
		dm_list_iterate_items(dl, &dm->cmd->pending_delete) {
			log_debug_activation("Deleting tracked UUID %s.", dl->str);
			if (!dm_tree_deactivate_children(root, dl->str, strlen(dl->str)))
				return_0;
		}
		dm_list_init(&dm->cmd->pending_delete);
		dm_pool_empty(dm->cmd->pending_delete_mem);
	}

	return 1;
}

static int _tree_action(struct dev_manager *dm, const struct logical_volume *lv,
			struct lv_activate_opts *laopts, action_t action)
{
	static const char _action_names[][24] = {
		"PRELOAD", "ACTIVATE", "DEACTIVATE", "SUSPEND", "SUSPEND_WITH_LOCKFS", "CLEAN"
	};
	const size_t DLID_SIZE = ID_LEN + sizeof(UUID_PREFIX) - 1;
	struct dm_tree *dtree;
	struct dm_tree_node *root;
	char *dlid;
	int r = 0;

	if (action < DM_ARRAY_SIZE(_action_names))
		log_debug_activation("Creating %s%s tree for %s.",
				     _action_names[action],
				     (laopts->origin_only) ? " origin-only" : "",
				     display_lvname(lv));

	/* Some LV cannot be used for top level tree */
	/* TODO: add more.... */
	if (lv_is_cache_pool(lv) && !dm_list_empty(&lv->segs_using_this_lv)) {
		log_error(INTERNAL_ERROR "Cannot create tree for %s.",
			  display_lvname(lv));
		return 0;
	}
	/* Some targets may build bigger tree for activation */
	dm->activation = ((action == PRELOAD) || (action == ACTIVATE));
	dm->suspend = (action == SUSPEND_WITH_LOCKFS) || (action == SUSPEND);

	/* Drop any cache before DM table manipulation within locked section
	 * TODO: check if it makes sense to manage cache within lock */
	dm_devs_cache_destroy();

	dtree = _create_partial_dtree(dm, lv, laopts->origin_only);

	if (!dtree)
		return_0;

	if (!(root = dm_tree_find_node(dtree, 0, 0))) {
		log_error("Lost dependency tree root node.");
		goto out_no_root;
	}

	/* Restore fs cookie */
	dm_tree_set_cookie(root, fs_get_cookie());

	if (!(dlid = build_dm_uuid(dm->mem, lv, laopts->origin_only ? lv_layer(lv) : NULL)))
		goto_out;

	/* Only process nodes with uuid of "LVM-" plus VG id. */
	switch(action) {
	case CLEAN:
		if (retry_deactivation())
			dm_tree_retry_remove(root);
		/* Deactivate any unused non-toplevel nodes */
		if (!_clean_tree(dm, root, laopts->origin_only ? dlid : NULL))
			goto_out;
		break;
	case DEACTIVATE:
		if (retry_deactivation())
			dm_tree_retry_remove(root);
		/* Deactivate LV and all devices it references that nothing else has open. */
		if (!dm_tree_deactivate_children(root, dlid, DLID_SIZE))
			goto_out;
		if (!_remove_lv_symlinks(dm, root))
			log_warn("Failed to remove all device symlinks associated with %s.",
				 display_lvname(lv));
		break;
	case SUSPEND:
		dm_tree_skip_lockfs(root);
		if (!dm->flush_required)
			dm_tree_use_no_flush_suspend(root);
		/* Fall through */
	case SUSPEND_WITH_LOCKFS:
		if (!dm_tree_suspend_children(root, dlid, DLID_SIZE))
			goto_out;
		break;
	case PRELOAD:
	case ACTIVATE:
		/* Add all required new devices to tree */
		if (!_add_new_lv_to_dtree(dm, dtree, lv, laopts,
					  (lv_is_origin(lv) && laopts->origin_only) ? "real" :
					  (laopts->origin_only &&
					   (lv_is_thin_pool(lv) ||
					    lv_is_vdo_pool(lv))) ?
					  lv_layer(lv) : NULL))
			goto_out;

		/* Preload any devices required before any suspensions */
		if (!dm_tree_preload_children(root, dlid, DLID_SIZE))
			goto_out;

		if ((dm_tree_node_size_changed(root) < 0))
			dm->flush_required = 1;
		/* Currently keep the code require flush for any
		 * non 'thin pool/volume' and  size increase */
		else if (!lv_is_thin_volume(lv) &&
			 !lv_is_thin_pool(lv) &&
			 !lv_is_vdo(lv) &&
			 !lv_is_vdo_pool(lv) &&
			 dm_tree_node_size_changed(root))
			dm->flush_required = 1;

		if (action == ACTIVATE) {
			if (!dm_tree_activate_children(root, dlid, DLID_SIZE))
				goto_out;
			if (!_create_lv_symlinks(dm, root))
				log_warn("Failed to create symlinks for %s.",
					 display_lvname(lv));
		}

		break;
	default:
		log_error(INTERNAL_ERROR "_tree_action: Action %u not supported.", action);
		goto out;
	}
	r = 1;

out:
	/* Save fs cookie for udev settle, do not wait here */
	fs_set_cookie(dm_tree_get_cookie(root));
out_no_root:
	dm_tree_free(dtree);

	return r;
}

/* origin_only may only be set if we are resuming (not activating) an origin LV */
int dev_manager_activate(struct dev_manager *dm, const struct logical_volume *lv,
			 struct lv_activate_opts *laopts)
{
	if (!_tree_action(dm, lv, laopts, ACTIVATE))
		return_0;

	if (!_tree_action(dm, lv, laopts, CLEAN))
		return_0;

	return 1;
}

/* origin_only may only be set if we are resuming (not activating) an origin LV */
int dev_manager_preload(struct dev_manager *dm, const struct logical_volume *lv,
			struct lv_activate_opts *laopts, int *flush_required)
{
	dm->flush_required = *flush_required;

	if (!_tree_action(dm, lv, laopts, PRELOAD))
		return_0;

	*flush_required = dm->flush_required;

	return 1;
}

int dev_manager_deactivate(struct dev_manager *dm, const struct logical_volume *lv)
{
	struct lv_activate_opts laopts = { 0 };

	if (!_tree_action(dm, lv, &laopts, DEACTIVATE))
		return_0;

	return 1;
}

int dev_manager_suspend(struct dev_manager *dm, const struct logical_volume *lv,
			struct lv_activate_opts *laopts, int lockfs, int flush_required)
{
	dm->flush_required = flush_required;

	if (!_tree_action(dm, lv, laopts, lockfs ? SUSPEND_WITH_LOCKFS : SUSPEND))
		return_0;

	return 1;
}

/*
 * Does device use VG somewhere in its construction?
 * Returns 1 if uncertain.
 */
int dev_manager_device_uses_vg(struct device *dev,
			       struct volume_group *vg)
{
	struct dm_tree *dtree;
	struct dm_tree_node *root;
	char dlid[sizeof(UUID_PREFIX) + sizeof(struct id) - 1] __attribute__((aligned(8)));
	int r = 1;

	if (!(dtree = dm_tree_create())) {
		log_error("Failed to create partial dtree.");
		return r;
	}

	dm_tree_set_optional_uuid_suffixes(dtree, (const char**)_uuid_suffix_list);

	if (!dm_tree_add_dev(dtree, MAJOR(dev->dev), MINOR(dev->dev))) {
		log_error("Failed to add device %s (%u:%u) to dtree.",
			  dev_name(dev), MAJOR(dev->dev), MINOR(dev->dev));
		goto out;
	}

	memcpy(dlid, UUID_PREFIX, sizeof(UUID_PREFIX) - 1);
	memcpy(dlid + sizeof(UUID_PREFIX) - 1, &vg->id.uuid[0], sizeof(vg->id));

	if (!(root = dm_tree_find_node(dtree, 0, 0))) {
		log_error("Lost dependency tree root node.");
		goto out;
	}

	if (dm_tree_children_use_uuid(root, dlid, sizeof(UUID_PREFIX) + sizeof(vg->id) - 1))
		goto_out;

	r = 0;

out:
	dm_tree_free(dtree);

	return r;
}

/*
 * crypt offset is usually the LUKS header size but can be larger.
 * The LUKS header is usually 2MB for LUKS1 and 16MB for LUKS2.
 * The offset needs to be subtracted from the LV size to get the
 * size used to resize the crypt device.
 */
int get_crypt_table_offset(dev_t crypt_devt, uint32_t *offset_bytes)
{
	struct dm_task *dmt;
	uint64_t start, length;
	char *target_type = NULL;
	void *next = NULL;
	char *params = NULL;
	char offset_str[32] = { 0 };
	int copy_offset = 0;
	int spaces = 0;
	unsigned i, i_off = 0;

	if (!(dmt = _setup_task_run(DM_DEVICE_TABLE, NULL, NULL, NULL, NULL,
				    MAJOR(crypt_devt), MINOR(crypt_devt), 0, 0, 0)))
		return_0;

	next = dm_get_next_target(dmt, next, &start, &length, &target_type, &params);

	if (!target_type || !params || strcmp(target_type, "crypt")) {
		dm_task_destroy(dmt);
		return_0;
	}

	/*
	 * get offset from params string:
	 * <cipher> <key> <iv_offset> <device> <offset> [<#opt_params> <opt_params>]
	 * <offset> is reported in 512 byte sectors.
	 */
	for (i = 0; params[i]; i++) {
		if (params[i] == ' ') {
			spaces++;
			if (spaces == 4)
				copy_offset = 1;
			if (spaces == 5)
				break;
			continue;
		}
		if (!copy_offset)
			continue;

		offset_str[i_off++] = params[i];

		if (i_off == sizeof(offset_str)) {
			offset_str[0] = '\0';
			break;
		}
	}
	dm_task_destroy(dmt);

	if (!offset_str[0])
		return_0;

	*offset_bytes = ((uint32_t)strtoul(offset_str, NULL, 0) * 512);
	return 1;
}