1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-19 14:04:17 +03:00
lvm2/libdm/libdm-deptree.c

4104 lines
104 KiB
C
Raw Normal View History

/*
* Copyright (C) 2005-2014 Red Hat, Inc. All rights reserved.
*
* This file is part of the device-mapper userspace tools.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "dmlib.h"
#include "libdm-targets.h"
#include "libdm-common.h"
#include "kdev_t.h"
#include "dm-ioctl.h"
#include <stdarg.h>
#include <sys/param.h>
#include <sys/utsname.h>
#define MAX_TARGET_PARAMSIZE 500000
#define REPLICATOR_LOCAL_SITE 0
/* Supported segment types */
enum {
SEG_CACHE,
SEG_CRYPT,
SEG_ERROR,
SEG_LINEAR,
SEG_MIRRORED,
SEG_REPLICATOR,
SEG_REPLICATOR_DEV,
SEG_SNAPSHOT,
SEG_SNAPSHOT_ORIGIN,
SEG_SNAPSHOT_MERGE,
SEG_STRIPED,
SEG_ZERO,
SEG_THIN_POOL,
SEG_THIN,
SEG_RAID1,
SEG_RAID10,
SEG_RAID4,
SEG_RAID5_LA,
SEG_RAID5_RA,
SEG_RAID5_LS,
SEG_RAID5_RS,
SEG_RAID6_ZR,
SEG_RAID6_NR,
SEG_RAID6_NC,
};
2005-11-09 14:10:50 +00:00
/* FIXME Add crypt and multipath support */
static const struct {
unsigned type;
const char target[16];
} _dm_segtypes[] = {
{ SEG_CACHE, "cache" },
{ SEG_CRYPT, "crypt" },
{ SEG_ERROR, "error" },
{ SEG_LINEAR, "linear" },
{ SEG_MIRRORED, "mirror" },
{ SEG_REPLICATOR, "replicator" },
{ SEG_REPLICATOR_DEV, "replicator-dev" },
{ SEG_SNAPSHOT, "snapshot" },
{ SEG_SNAPSHOT_ORIGIN, "snapshot-origin" },
{ SEG_SNAPSHOT_MERGE, "snapshot-merge" },
{ SEG_STRIPED, "striped" },
{ SEG_ZERO, "zero"},
{ SEG_THIN_POOL, "thin-pool"},
{ SEG_THIN, "thin"},
{ SEG_RAID1, "raid1"},
{ SEG_RAID10, "raid10"},
{ SEG_RAID4, "raid4"},
{ SEG_RAID5_LA, "raid5_la"},
{ SEG_RAID5_RA, "raid5_ra"},
{ SEG_RAID5_LS, "raid5_ls"},
{ SEG_RAID5_RS, "raid5_rs"},
{ SEG_RAID6_ZR, "raid6_zr"},
{ SEG_RAID6_NR, "raid6_nr"},
{ SEG_RAID6_NC, "raid6_nc"},
/*
*WARNING: Since 'raid' target overloads this 1:1 mapping table
* for search do not add new enum elements past them!
*/
{ SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */
{ SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */
};
/* Some segment types have a list of areas of other devices attached */
struct seg_area {
struct dm_list list;
2005-11-09 14:10:50 +00:00
struct dm_tree_node *dev_node;
uint64_t offset;
unsigned rsite_index; /* Replicator site index */
struct dm_tree_node *slog; /* Replicator sync log node */
uint64_t region_size; /* Replicator sync log size */
uint32_t flags; /* Replicator sync log flags */
};
struct dm_thin_message {
dm_thin_message_t type;
union {
struct {
uint32_t device_id;
uint32_t origin_id;
} m_create_snap;
struct {
uint32_t device_id;
} m_create_thin;
struct {
uint32_t device_id;
} m_delete;
struct {
uint64_t current_id;
uint64_t new_id;
} m_set_transaction_id;
} u;
};
struct thin_message {
struct dm_list list;
struct dm_thin_message message;
int expected_errno;
};
/* Replicator-log has a list of sites */
/* FIXME: maybe move to seg_area too? */
struct replicator_site {
struct dm_list list;
unsigned rsite_index;
dm_replicator_mode_t mode;
uint32_t async_timeout;
uint32_t fall_behind_ios;
uint64_t fall_behind_data;
};
/* Per-segment properties */
struct load_segment {
struct dm_list list;
unsigned type;
uint64_t size;
unsigned area_count; /* Linear + Striped + Mirrored + Crypt + Replicator */
struct dm_list areas; /* Linear + Striped + Mirrored + Crypt + Replicator */
uint32_t stripe_size; /* Striped + raid */
int persistent; /* Snapshot */
uint32_t chunk_size; /* Snapshot + cache */
2005-11-09 14:10:50 +00:00
struct dm_tree_node *cow; /* Snapshot */
struct dm_tree_node *origin; /* Snapshot + Snapshot origin + Cache */
struct dm_tree_node *merge; /* Snapshot */
struct dm_tree_node *log; /* Mirror + Replicator */
uint32_t region_size; /* Mirror + raid */
unsigned clustered; /* Mirror */
unsigned mirror_area_count; /* Mirror */
uint32_t flags; /* Mirror + raid + Cache */
2006-02-06 20:18:10 +00:00
char *uuid; /* Clustered mirror log */
const char *policy_name; /* Cache */
unsigned policy_argc; /* Cache */
struct dm_config_node *policy_settings; /* Cache */
const char *cipher; /* Crypt */
const char *chainmode; /* Crypt */
const char *iv; /* Crypt */
uint64_t iv_offset; /* Crypt */
const char *key; /* Crypt */
const char *rlog_type; /* Replicator */
struct dm_list rsites; /* Replicator */
unsigned rsite_count; /* Replicator */
unsigned rdevice_count; /* Replicator */
struct dm_tree_node *replicator;/* Replicator-dev */
uint64_t rdevice_index; /* Replicator-dev */
uint64_t rebuilds; /* raid */
uint64_t writemostly; /* raid */
uint32_t writebehind; /* raid */
uint32_t max_recovery_rate; /* raid kB/sec/disk */
uint32_t min_recovery_rate; /* raid kB/sec/disk */
struct dm_tree_node *metadata; /* Thin_pool + Cache */
struct dm_tree_node *pool; /* Thin_pool, Thin */
struct dm_tree_node *external; /* Thin */
struct dm_list thin_messages; /* Thin_pool */
uint64_t transaction_id; /* Thin_pool */
uint64_t low_water_mark; /* Thin_pool */
uint32_t data_block_size; /* Thin_pool */
unsigned skip_block_zeroing; /* Thin_pool */
unsigned ignore_discard; /* Thin_pool target vsn 1.1 */
unsigned no_discard_passdown; /* Thin_pool target vsn 1.1 */
unsigned error_if_no_space; /* Thin pool target vsn 1.10 */
uint32_t device_id; /* Thin */
};
/* Per-device properties */
struct load_properties {
int read_only;
uint32_t major;
uint32_t minor;
uint32_t read_ahead;
uint32_t read_ahead_flags;
unsigned segment_count;
unsigned size_changed;
struct dm_list segs;
const char *new_name;
/* If immediate_dev_node is set to 1, try to create the dev node
* as soon as possible (e.g. in preload stage even during traversal
* and processing of dm tree). This will also flush all stacked dev
* node operations, synchronizing with udev.
*/
unsigned immediate_dev_node;
/*
* If the device size changed from zero and this is set,
* don't resume the device immediately, even if the device
* has parents. This works provided the parents do not
* validate the device size and is required by pvmove to
* avoid starting the mirror resync operation too early.
*/
unsigned delay_resume_if_new;
/* Send messages for this node in preload */
unsigned send_messages;
};
/* Two of these used to join two nodes with uses and used_by. */
2005-11-09 14:10:50 +00:00
struct dm_tree_link {
struct dm_list list;
2005-11-09 14:10:50 +00:00
struct dm_tree_node *node;
};
2005-11-09 14:10:50 +00:00
struct dm_tree_node {
struct dm_tree *dtree;
2011-08-19 17:02:48 +00:00
const char *name;
const char *uuid;
struct dm_info info;
2011-08-19 17:02:48 +00:00
struct dm_list uses; /* Nodes this node uses */
struct dm_list used_by; /* Nodes that use this node */
2005-11-22 20:00:35 +00:00
int activation_priority; /* 0 gets activated first */
int implicit_deps; /* 1 device only implicitly referenced */
2005-11-22 20:00:35 +00:00
uint16_t udev_flags; /* Udev control flags */
void *context; /* External supplied context */
struct load_properties props; /* For creation/table (re)load */
/*
* If presuspend of child node is needed
* Note: only direct child is allowed
*/
struct dm_tree_node *presuspend_node;
/* Callback */
dm_node_callback_fn callback;
void *callback_data;
/*
* TODO:
* Add advanced code which tracks of send ioctls and their
* proper revert operation for more advanced recovery
* Current code serves mostly only to recovery when
* thin pool metadata check fails and command would
* have left active thin data and metadata subvolumes.
*/
struct dm_list activated; /* Head of activated nodes for preload revert */
struct dm_list activated_list; /* List of activated nodes for preload revert */
};
2005-11-09 14:10:50 +00:00
struct dm_tree {
2005-10-16 22:57:20 +00:00
struct dm_pool *mem;
struct dm_hash_table *devs;
struct dm_hash_table *uuids;
2005-11-09 14:10:50 +00:00
struct dm_tree_node root;
2006-01-30 23:36:04 +00:00
int skip_lockfs; /* 1 skips lockfs (for non-snapshots) */
int no_flush; /* 1 sets noflush (mirrors/multipath) */
int retry_remove; /* 1 retries remove if not successful */
uint32_t cookie;
const char **optional_uuid_suffixes; /* uuid suffixes ignored when matching */
};
/*
* Tree functions.
*/
2005-11-09 14:10:50 +00:00
struct dm_tree *dm_tree_create(void)
{
struct dm_pool *dmem;
2005-11-09 14:10:50 +00:00
struct dm_tree *dtree;
if (!(dmem = dm_pool_create("dtree", 1024)) ||
!(dtree = dm_pool_zalloc(dmem, sizeof(*dtree)))) {
log_error("Failed to allocate dtree.");
if (dmem)
dm_pool_destroy(dmem);
return NULL;
}
2005-11-09 14:10:50 +00:00
dtree->root.dtree = dtree;
dm_list_init(&dtree->root.uses);
dm_list_init(&dtree->root.used_by);
dm_list_init(&dtree->root.activated);
2006-01-30 23:36:04 +00:00
dtree->skip_lockfs = 0;
2007-01-09 19:44:07 +00:00
dtree->no_flush = 0;
dtree->mem = dmem;
dtree->optional_uuid_suffixes = NULL;
2005-11-09 14:10:50 +00:00
if (!(dtree->devs = dm_hash_create(8))) {
log_error("dtree hash creation failed");
dm_pool_destroy(dtree->mem);
return NULL;
}
2005-11-09 14:10:50 +00:00
if (!(dtree->uuids = dm_hash_create(32))) {
log_error("dtree uuid hash creation failed");
dm_hash_destroy(dtree->devs);
dm_pool_destroy(dtree->mem);
return NULL;
}
2005-11-09 14:10:50 +00:00
return dtree;
}
2005-11-09 14:10:50 +00:00
void dm_tree_free(struct dm_tree *dtree)
{
2005-11-09 14:10:50 +00:00
if (!dtree)
return;
2005-11-09 14:10:50 +00:00
dm_hash_destroy(dtree->uuids);
dm_hash_destroy(dtree->devs);
dm_pool_destroy(dtree->mem);
}
void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie)
{
node->dtree->cookie = cookie;
}
uint32_t dm_tree_get_cookie(struct dm_tree_node *node)
{
return node->dtree->cookie;
}
void dm_tree_skip_lockfs(struct dm_tree_node *dnode)
{
dnode->dtree->skip_lockfs = 1;
}
void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode)
{
dnode->dtree->no_flush = 1;
}
void dm_tree_retry_remove(struct dm_tree_node *dnode)
{
dnode->dtree->retry_remove = 1;
}
/*
* Node functions.
*/
static int _nodes_are_linked(const struct dm_tree_node *parent,
const struct dm_tree_node *child)
{
2005-11-09 14:10:50 +00:00
struct dm_tree_link *dlink;
dm_list_iterate_items(dlink, &parent->uses)
if (dlink->node == child)
return 1;
return 0;
}
static int _link(struct dm_list *list, struct dm_tree_node *node)
{
2005-11-09 14:10:50 +00:00
struct dm_tree_link *dlink;
2005-11-09 14:10:50 +00:00
if (!(dlink = dm_pool_alloc(node->dtree->mem, sizeof(*dlink)))) {
log_error("dtree link allocation failed");
return 0;
}
dlink->node = node;
dm_list_add(list, &dlink->list);
return 1;
}
2005-11-09 14:10:50 +00:00
static int _link_nodes(struct dm_tree_node *parent,
struct dm_tree_node *child)
{
if (_nodes_are_linked(parent, child))
return 1;
if (!_link(&parent->uses, child))
return 0;
if (!_link(&child->used_by, parent))
return 0;
return 1;
}
static void _unlink(struct dm_list *list, struct dm_tree_node *node)
{
2005-11-09 14:10:50 +00:00
struct dm_tree_link *dlink;
dm_list_iterate_items(dlink, list)
if (dlink->node == node) {
dm_list_del(&dlink->list);
break;
}
}
2005-11-09 14:10:50 +00:00
static void _unlink_nodes(struct dm_tree_node *parent,
struct dm_tree_node *child)
{
if (!_nodes_are_linked(parent, child))
return;
_unlink(&parent->uses, child);
_unlink(&child->used_by, parent);
}
2005-11-09 14:10:50 +00:00
static int _add_to_toplevel(struct dm_tree_node *node)
{
2005-11-09 14:10:50 +00:00
return _link_nodes(&node->dtree->root, node);
}
2005-11-09 14:10:50 +00:00
static void _remove_from_toplevel(struct dm_tree_node *node)
{
2009-12-11 13:16:37 +00:00
_unlink_nodes(&node->dtree->root, node);
}
2005-11-09 14:10:50 +00:00
static int _add_to_bottomlevel(struct dm_tree_node *node)
{
2005-11-09 14:10:50 +00:00
return _link_nodes(node, &node->dtree->root);
}
2005-11-09 14:10:50 +00:00
static void _remove_from_bottomlevel(struct dm_tree_node *node)
{
2009-12-11 13:16:37 +00:00
_unlink_nodes(node, &node->dtree->root);
}
2005-11-09 14:10:50 +00:00
static int _link_tree_nodes(struct dm_tree_node *parent, struct dm_tree_node *child)
{
/* Don't link to root node if child already has a parent */
if (parent == &parent->dtree->root) {
2005-11-09 14:10:50 +00:00
if (dm_tree_node_num_children(child, 1))
return 1;
} else
_remove_from_toplevel(child);
if (child == &child->dtree->root) {
2005-11-09 14:10:50 +00:00
if (dm_tree_node_num_children(parent, 0))
return 1;
} else
_remove_from_bottomlevel(parent);
return _link_nodes(parent, child);
}
2005-11-09 14:10:50 +00:00
static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
const char *name,
const char *uuid,
struct dm_info *info,
void *context,
uint16_t udev_flags)
{
2005-11-09 14:10:50 +00:00
struct dm_tree_node *node;
2012-06-21 12:55:30 +02:00
dev_t dev;
2005-11-09 14:10:50 +00:00
if (!(node = dm_pool_zalloc(dtree->mem, sizeof(*node)))) {
log_error("_create_dm_tree_node alloc failed");
return NULL;
}
2005-11-09 14:10:50 +00:00
node->dtree = dtree;
node->name = name;
node->uuid = uuid;
node->info = *info;
node->context = context;
node->udev_flags = udev_flags;
dm_list_init(&node->uses);
dm_list_init(&node->used_by);
dm_list_init(&node->activated);
dm_list_init(&node->props.segs);
dev = MKDEV((dev_t)info->major, info->minor);
2005-11-09 14:10:50 +00:00
if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev,
sizeof(dev), node)) {
2005-11-09 14:10:50 +00:00
log_error("dtree node hash insertion failed");
dm_pool_free(dtree->mem, node);
return NULL;
}
if (uuid && *uuid &&
2005-11-09 14:10:50 +00:00
!dm_hash_insert(dtree->uuids, uuid, node)) {
log_error("dtree uuid hash insertion failed");
dm_hash_remove_binary(dtree->devs, (const char *) &dev,
sizeof(dev));
2005-11-09 14:10:50 +00:00
dm_pool_free(dtree->mem, node);
return NULL;
}
return node;
}
2005-11-09 14:10:50 +00:00
static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree,
uint32_t major, uint32_t minor)
{
dev_t dev = MKDEV((dev_t)major, minor);
2005-11-09 14:10:50 +00:00
return dm_hash_lookup_binary(dtree->devs, (const char *) &dev,
2014-02-14 20:46:55 +01:00
sizeof(dev));
}
void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes)
{
dtree->optional_uuid_suffixes = optional_uuid_suffixes;
}
2005-11-09 14:10:50 +00:00
static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree,
const char *uuid)
{
struct dm_tree_node *node;
const char *default_uuid_prefix;
size_t default_uuid_prefix_len;
const char *suffix, *suffix_position;
char uuid_without_suffix[DM_UUID_LEN];
unsigned i = 0;
const char **suffix_list = dtree->optional_uuid_suffixes;
if ((node = dm_hash_lookup(dtree->uuids, uuid))) {
log_debug("Matched uuid %s in deptree.", uuid);
return node;
}
default_uuid_prefix = dm_uuid_prefix();
default_uuid_prefix_len = strlen(default_uuid_prefix);
if (suffix_list && (suffix_position = rindex(uuid, '-'))) {
while ((suffix = suffix_list[i++])) {
if (strcmp(suffix_position + 1, suffix))
continue;
(void) strncpy(uuid_without_suffix, uuid, sizeof(uuid_without_suffix));
uuid_without_suffix[suffix_position - uuid] = '\0';
if ((node = dm_hash_lookup(dtree->uuids, uuid_without_suffix))) {
log_debug("Matched uuid %s (missing suffix -%s) in deptree.", uuid_without_suffix, suffix);
return node;
}
break;
};
}
if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
return NULL;
if ((node = dm_hash_lookup(dtree->uuids, uuid + default_uuid_prefix_len))) {
log_debug("Matched uuid %s (missing prefix) in deptree.", uuid + default_uuid_prefix_len);
return node;
}
log_debug("Not matched uuid %s in deptree.", uuid + default_uuid_prefix_len);
return NULL;
}
void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags)
{
struct dm_info *dinfo = &dnode->info;
if (udev_flags != dnode->udev_flags)
log_debug_activation("Resetting %s (%" PRIu32 ":%" PRIu32
") udev_flags from 0x%x to 0x%x",
dnode->name, dinfo->major, dinfo->minor,
dnode->udev_flags, udev_flags);
dnode->udev_flags = udev_flags;
}
void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
uint32_t read_ahead,
uint32_t read_ahead_flags)
{
dnode->props.read_ahead = read_ahead;
dnode->props.read_ahead_flags = read_ahead_flags;
}
void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
struct dm_tree_node *presuspend_node)
{
node->presuspend_node = presuspend_node;
}
const char *dm_tree_node_get_name(const struct dm_tree_node *node)
{
return node->info.exists ? node->name : "";
}
const char *dm_tree_node_get_uuid(const struct dm_tree_node *node)
{
return node->info.exists ? node->uuid : "";
}
const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node)
{
return &node->info;
}
void *dm_tree_node_get_context(const struct dm_tree_node *node)
{
return node->context;
}
int dm_tree_node_size_changed(const struct dm_tree_node *dnode)
{
return dnode->props.size_changed;
}
int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted)
{
if (inverted) {
if (_nodes_are_linked(&node->dtree->root, node))
return 0;
return dm_list_size(&node->used_by);
}
if (_nodes_are_linked(node, &node->dtree->root))
return 0;
return dm_list_size(&node->uses);
}
/*
* Returns 1 if no prefix supplied
*/
static int _uuid_prefix_matches(const char *uuid, const char *uuid_prefix, size_t uuid_prefix_len)
{
const char *default_uuid_prefix = dm_uuid_prefix();
size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
if (!uuid_prefix)
return 1;
if (!strncmp(uuid, uuid_prefix, uuid_prefix_len))
return 1;
/* Handle transition: active device uuids might be missing the prefix */
if (uuid_prefix_len <= 4)
return 0;
if (!strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
return 0;
if (strncmp(uuid_prefix, default_uuid_prefix, default_uuid_prefix_len))
return 0;
if (!strncmp(uuid, uuid_prefix + default_uuid_prefix_len, uuid_prefix_len - default_uuid_prefix_len))
return 1;
return 0;
}
/*
* Returns 1 if no children.
*/
static int _children_suspended(struct dm_tree_node *node,
uint32_t inverted,
const char *uuid_prefix,
size_t uuid_prefix_len)
{
struct dm_list *list;
struct dm_tree_link *dlink;
const struct dm_info *dinfo;
const char *uuid;
if (inverted) {
if (_nodes_are_linked(&node->dtree->root, node))
return 1;
list = &node->used_by;
} else {
if (_nodes_are_linked(node, &node->dtree->root))
return 1;
list = &node->uses;
}
dm_list_iterate_items(dlink, list) {
if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
stack;
continue;
}
/* Ignore if it doesn't belong to this VG */
if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
continue;
/* Ignore if parent node wants to presuspend this node */
if (dlink->node->presuspend_node == node)
continue;
if (!(dinfo = dm_tree_node_get_info(dlink->node)))