1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-06 17:18:29 +03:00
lvm2/lib/metadata/metadata.c

4018 lines
101 KiB
C
Raw Normal View History

2001-09-25 16:49:28 +04:00
/*
2008-01-30 17:00:02 +03:00
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
2001-09-25 16:49:28 +04:00
*
2004-03-30 23:35:44 +04:00
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
2004-03-30 23:35:44 +04:00
*
* You should have received a copy of the GNU Lesser General Public License
2004-03-30 23:35:44 +04:00
* along with this program; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2001-09-25 16:49:28 +04:00
*/
2002-11-18 17:04:08 +03:00
#include "lib.h"
#include "device.h"
2001-10-01 19:14:39 +04:00
#include "metadata.h"
#include "toolcontext.h"
#include "lvm-string.h"
#include "lvm-file.h"
#include "lvmcache.h"
#include "memlock.h"
2005-04-20 00:52:35 +04:00
#include "str_list.h"
#include "pv_alloc.h"
#include "segtype.h"
#include "activate.h"
#include "display.h"
#include "locking.h"
#include "archiver.h"
#include "defaults.h"
#include "filter-persistent.h"
2001-09-25 16:49:28 +04:00
#include <math.h>
2006-08-17 23:53:36 +04:00
#include <sys/param.h>
2008-01-30 17:00:02 +03:00
static struct physical_volume *_pv_read(struct cmd_context *cmd,
struct dm_pool *pvmem,
const char *pv_name,
struct dm_list *mdas,
uint64_t *label_sector,
int warnings, int scan_label_only);
static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
const char *pv_name);
2008-03-14 01:51:24 +03:00
static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
const char *pv_name);
static struct pv_list *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
const struct id *id);
static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
uint64_t status);
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
const char _really_init[] =
"Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
static int _alignment_overrides_default(unsigned long data_alignment,
unsigned long default_pe_align)
{
return data_alignment && (default_pe_align % data_alignment);
}
unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
2006-08-17 23:30:59 +04:00
{
unsigned long default_pe_align, temp_pe_align;
2008-09-19 09:19:09 +04:00
if (pv->pe_align)
goto out;
if (data_alignment) {
/* Always use specified data_alignment */
pv->pe_align = data_alignment;
goto out;
}
default_pe_align = find_config_tree_int(pv->fmt->cmd,
"devices/default_data_alignment",
DEFAULT_DATA_ALIGNMENT);
if (default_pe_align)
/* align on 1 MiB multiple */
default_pe_align *= DEFAULT_PE_ALIGN;
2009-10-06 20:00:38 +04:00
else
/* align on 64 KiB multiple (old default) */
default_pe_align = DEFAULT_PE_ALIGN_OLD;
pv->pe_align = MAX((default_pe_align << SECTOR_SHIFT),
lvm_getpagesize()) >> SECTOR_SHIFT;
2008-09-19 09:19:09 +04:00
if (!pv->dev)
goto out;
/*
* Align to stripe-width of underlying md device if present
*/
if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
DEFAULT_MD_CHUNK_ALIGNMENT)) {
temp_pe_align = dev_md_stripe_width(pv->fmt->cmd->sysfs_dir, pv->dev);
if (_alignment_overrides_default(temp_pe_align, default_pe_align))
pv->pe_align = MAX(pv->pe_align, temp_pe_align);
}
2008-09-19 09:19:09 +04:00
/*
* Align to topology's minimum_io_size or optimal_io_size if present
* - minimum_io_size - the smallest request the device can perform
* w/o incurring a read-modify-write penalty (e.g. MD's chunk size)
* - optimal_io_size - the device's preferred unit of receiving I/O
* (e.g. MD's stripe width)
*/
if (find_config_tree_bool(pv->fmt->cmd,
"devices/data_alignment_detection",
DEFAULT_DATA_ALIGNMENT_DETECTION)) {
temp_pe_align = dev_minimum_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
if (_alignment_overrides_default(temp_pe_align, default_pe_align))
pv->pe_align = MAX(pv->pe_align, temp_pe_align);
temp_pe_align = dev_optimal_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
if (_alignment_overrides_default(temp_pe_align, default_pe_align))
pv->pe_align = MAX(pv->pe_align, temp_pe_align);
}
out:
log_very_verbose("%s: Setting PE alignment to %lu sectors.",
dev_name(pv->dev), pv->pe_align);
return pv->pe_align;
2006-08-17 23:30:59 +04:00
}
unsigned long set_pe_align_offset(struct physical_volume *pv,
unsigned long data_alignment_offset)
{
if (pv->pe_align_offset)
goto out;
if (data_alignment_offset) {
/* Always use specified data_alignment_offset */
pv->pe_align_offset = data_alignment_offset;
goto out;
}
if (!pv->dev)
goto out;
if (find_config_tree_bool(pv->fmt->cmd,
"devices/data_alignment_offset_detection",
DEFAULT_DATA_ALIGNMENT_OFFSET_DETECTION)) {
int align_offset = dev_alignment_offset(pv->fmt->cmd->sysfs_dir,
pv->dev);
/* must handle a -1 alignment_offset; means dev is misaligned */
if (align_offset < 0)
align_offset = 0;
pv->pe_align_offset = MAX(pv->pe_align_offset, align_offset);
}
out:
log_very_verbose("%s: Setting PE alignment offset to %lu sectors.",
dev_name(pv->dev), pv->pe_align_offset);
return pv->pe_align_offset;
}
void add_pvl_to_vgs(struct volume_group *vg, struct pv_list *pvl)
{
dm_list_add(&vg->pvs, &pvl->list);
vg->pv_count++;
pvl->pv->vg = vg;
}
void del_pvl_from_vgs(struct volume_group *vg, struct pv_list *pvl)
{
vg->pv_count--;
dm_list_del(&pvl->list);
pvl->pv->vg = NULL; /* orphan */
}
/**
* add_pv_to_vg - Add a physical volume to a volume group
* @vg - volume group to add to
* @pv_name - name of the pv (to be removed)
* @pv - physical volume to add to volume group
*
* Returns:
* 0 - failure
* 1 - success
* FIXME: remove pv_name - obtain safely from pv
*/
int add_pv_to_vg(struct volume_group *vg, const char *pv_name,
struct physical_volume *pv)
2001-10-12 18:25:53 +04:00
{
2001-10-15 22:39:40 +04:00
struct pv_list *pvl;
struct format_instance *fid = vg->fid;
struct dm_pool *mem = vg->vgmem;
char uuid[64] __attribute__((aligned(8)));
struct dm_list *mdas;
2001-10-12 18:25:53 +04:00
2001-10-15 22:39:40 +04:00
log_verbose("Adding physical volume '%s' to volume group '%s'",
pv_name, vg->name);
2001-10-15 22:39:40 +04:00
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
2001-10-15 22:39:40 +04:00
log_error("pv_list allocation for '%s' failed", pv_name);
2001-10-12 18:25:53 +04:00
return 0;
}
if (!is_orphan_vg(pv->vg_name)) {
2001-10-15 22:39:40 +04:00
log_error("Physical volume '%s' is already in volume group "
"'%s'", pv_name, pv->vg_name);
return 0;
}
2001-10-12 18:25:53 +04:00
2002-11-18 17:04:08 +03:00
if (pv->fmt != fid->fmt) {
log_error("Physical volume %s is of different format type (%s)",
pv_name, pv->fmt->name);
return 0;
}
/* Ensure PV doesn't depend on another PV already in the VG */
if (pv_uses_vg(pv, vg)) {
log_error("Physical volume %s might be constructed from same "
"volume group %s", pv_name, vg->name);
return 0;
}
if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) {
2001-10-15 22:39:40 +04:00
log_error("vg->name allocation failed for '%s'", pv_name);
2001-10-12 18:25:53 +04:00
return 0;
}
memcpy(&pv->vgid, &vg->id, sizeof(vg->id));
2001-10-16 00:29:15 +04:00
/* Units of 512-byte sectors */
2001-10-12 18:25:53 +04:00
pv->pe_size = vg->extent_size;
/*
* pe_count must always be calculated by pv_setup
2001-10-12 18:25:53 +04:00
*/
pv->pe_alloc_count = 0;
2001-10-12 18:25:53 +04:00
/*
* FIXME: this does not work entirely correctly in the case where a PV
* has 2 mdas and only one is ignored; ideally all non-ignored mdas
* should be placed on metadata_areas list and ignored on the
* metadata_areas_ignored list; however this requires another
* fairly complex refactoring to remove the 'mdas' parameter from both
* pv_setup and pv_write. For now, we only put ignored mdas on the
* metadata_areas_ignored list if all mdas in the PV are ignored;
* otherwise, we use the non-ignored list.
*/
if (!pv_mda_used_count(pv))
mdas = &fid->metadata_areas_ignored;
else
mdas = &fid->metadata_areas_in_use;
if (!fid->fmt->ops->pv_setup(fid->fmt, UINT64_C(0), 0,
vg->extent_size, 0, 0, 0UL, UINT64_C(0),
0, mdas, pv, vg)) {
2002-01-28 00:30:47 +03:00
log_error("Format-specific setup of physical volume '%s' "
2001-10-15 22:39:40 +04:00
"failed.", pv_name);
return 0;
}
if (_find_pv_in_vg(vg, pv_name) ||
_find_pv_in_vg_by_uuid(vg, &pv->id)) {
if (!id_write_format(&pv->id, uuid, sizeof(uuid))) {
stack;
uuid[0] = '\0';
}
log_error("Physical volume '%s (%s)' listed more than once.",
pv_name, uuid);
2001-10-12 18:25:53 +04:00
return 0;
}
if (vg->pv_count && (vg->pv_count == vg->max_pv)) {
2001-10-15 22:39:40 +04:00
log_error("No space for '%s' - volume group '%s' "
"holds max %d physical volume(s).", pv_name,
vg->name, vg->max_pv);
return 0;
}
2008-01-30 16:19:47 +03:00
if (!alloc_pv_segment_whole_pv(mem, pv))
return_0;
2001-10-15 22:39:40 +04:00
if ((uint64_t) vg->extent_count + pv->pe_count > UINT32_MAX) {
log_error("Unable to add %s to %s: new extent count (%"
PRIu64 ") exceeds limit (%" PRIu32 ").",
pv_name, vg->name,
(uint64_t) vg->extent_count + pv->pe_count,
UINT32_MAX);
return 0;
}
pvl->pv = pv;
add_pvl_to_vgs(vg, pvl);
2001-11-06 22:02:26 +03:00
vg->extent_count += pv->pe_count;
vg->free_count += pv->pe_count;
2001-10-12 18:25:53 +04:00
return 1;
}
static int _copy_pv(struct dm_pool *pvmem,
struct physical_volume *pv_to,
2005-04-20 00:44:21 +04:00
struct physical_volume *pv_from)
2005-04-18 03:59:04 +04:00
{
memcpy(pv_to, pv_from, sizeof(*pv_to));
2005-04-20 00:52:35 +04:00
if (!(pv_to->vg_name = dm_pool_strdup(pvmem, pv_from->vg_name)))
return_0;
if (!str_list_dup(pvmem, &pv_to->tags, &pv_from->tags))
return_0;
2005-04-20 00:52:35 +04:00
if (!peg_dup(pvmem, &pv_to->segments, &pv_from->segments))
2008-01-30 16:19:47 +03:00
return_0;
2005-04-20 00:52:35 +04:00
return 1;
2005-04-18 03:59:04 +04:00
}
static struct pv_list *_copy_pvl(struct dm_pool *pvmem, struct pv_list *pvl_from)
{
struct pv_list *pvl_to = NULL;
if (!(pvl_to = dm_pool_zalloc(pvmem, sizeof(*pvl_to))))
return_NULL;
if (!(pvl_to->pv = dm_pool_alloc(pvmem, sizeof(*pvl_to->pv))))
goto_bad;
if(!_copy_pv(pvmem, pvl_to->pv, pvl_from->pv))
goto_bad;
return pvl_to;
bad:
dm_pool_free(pvmem, pvl_to);
return NULL;
}
2005-04-18 03:57:44 +04:00
int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name,
const char *vgid, const char *pvid,
struct physical_volume *pv)
2005-04-18 03:57:44 +04:00
{
struct volume_group *vg;
struct pv_list *pvl;
int r = 0, consistent = 0;
2005-04-18 03:57:44 +04:00
if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, &consistent))) {
log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s",
2005-04-18 03:57:44 +04:00
vg_name);
return 0;
}
if (!consistent)
log_warn("WARNING: Volume group %s is not consistent",
vg_name);
2005-04-18 03:57:44 +04:00
dm_list_iterate_items(pvl, &vg->pvs) {
if (id_equal(&pvl->pv->id, (const struct id *) pvid)) {
if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) {
log_error("internal PV duplication failed");
r = 0;
goto out;
}
r = 1;
goto out;
2005-04-18 03:57:44 +04:00
}
}
out:
vg_release(vg);
return r;
2005-04-18 03:57:44 +04:00
}
int move_pv(struct volume_group *vg_from, struct volume_group *vg_to,
const char *pv_name)
{
struct physical_volume *pv;
struct pv_list *pvl;
/* FIXME: handle tags */
if (!(pvl = find_pv_in_vg(vg_from, pv_name))) {
log_error("Physical volume %s not in volume group %s",
pv_name, vg_from->name);
return 0;
}
if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
_vg_bad_status_bits(vg_to, RESIZEABLE_VG))
return 0;
del_pvl_from_vgs(vg_from, pvl);
add_pvl_to_vgs(vg_to, pvl);
pv = pvl->pv;
vg_from->extent_count -= pv_pe_count(pv);
vg_to->extent_count += pv_pe_count(pv);
vg_from->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
vg_to->free_count += pv_pe_count(pv) - pv_pe_alloc_count(pv);
return 1;
}
int move_pvs_used_by_lv(struct volume_group *vg_from,
struct volume_group *vg_to,
const char *lv_name)
{
struct lv_segment *lvseg;
unsigned s;
struct lv_list *lvl;
struct logical_volume *lv;
/* FIXME: handle tags */
if (!(lvl = find_lv_in_vg(vg_from, lv_name))) {
log_error("Logical volume %s not in volume group %s",
lv_name, vg_from->name);
return 0;
}
if (_vg_bad_status_bits(vg_from, RESIZEABLE_VG) ||
_vg_bad_status_bits(vg_to, RESIZEABLE_VG))
return 0;
dm_list_iterate_items(lvseg, &lvl->lv->segments) {
if (lvseg->log_lv)
if (!move_pvs_used_by_lv(vg_from, vg_to,
lvseg->log_lv->name))
return_0;
for (s = 0; s < lvseg->area_count; s++) {
if (seg_type(lvseg, s) == AREA_PV) {
if (!move_pv(vg_from, vg_to,
pv_dev_name(seg_pv(lvseg, s))))
return_0;
} else if (seg_type(lvseg, s) == AREA_LV) {
lv = seg_lv(lvseg, s);
if (!move_pvs_used_by_lv(vg_from, vg_to,
lv->name))
return_0;
}
}
}
return 1;
}
static int validate_new_vg_name(struct cmd_context *cmd, const char *vg_name)
{
char vg_path[PATH_MAX];
if (!validate_name(vg_name))
2008-01-17 20:17:09 +03:00
return_0;
snprintf(vg_path, PATH_MAX, "%s%s", cmd->dev_dir, vg_name);
if (path_exists(vg_path)) {
log_error("%s: already exists in filesystem", vg_path);
return 0;
}
return 1;
}
int validate_vg_rename_params(struct cmd_context *cmd,
const char *vg_name_old,
const char *vg_name_new)
{
unsigned length;
char *dev_dir;
dev_dir = cmd->dev_dir;
length = strlen(dev_dir);
/* Check sanity of new name */
if (strlen(vg_name_new) > NAME_LEN - length - 2) {
log_error("New volume group path exceeds maximum length "
"of %d!", NAME_LEN - length - 2);
return 0;
}
if (!validate_new_vg_name(cmd, vg_name_new)) {
log_error("New volume group name \"%s\" is invalid",
vg_name_new);
return 0;
}
if (!strcmp(vg_name_old, vg_name_new)) {
log_error("Old and new volume group names must differ");
return 0;
}
return 1;
}
int vg_rename(struct cmd_context *cmd, struct volume_group *vg,
const char *new_name)
{
struct dm_pool *mem = vg->vgmem;
2005-06-01 20:51:55 +04:00
struct pv_list *pvl;
vg->old_name = vg->name;
if (!(vg->name = dm_pool_strdup(mem, new_name))) {
log_error("vg->name allocation failed for '%s'", new_name);
return 0;
}
dm_list_iterate_items(pvl, &vg->pvs) {
if (!(pvl->pv->vg_name = dm_pool_strdup(mem, new_name))) {
log_error("pv->vg_name allocation failed for '%s'",
pv_dev_name(pvl->pv));
return 0;
}
}
return 1;
}
int remove_lvs_in_vg(struct cmd_context *cmd,
struct volume_group *vg,
force_t force)
{
struct dm_list *lst;
struct lv_list *lvl;
while ((lst = dm_list_first(&vg->lvs))) {
lvl = dm_list_item(lst, struct lv_list);
if (!lv_remove_with_dependencies(cmd, lvl->lv, force, 0))
return 0;
}
return 1;
}
int vg_remove_check(struct volume_group *vg)
{
unsigned lv_count;
if (vg_read_error(vg) || vg_missing_pv_count(vg)) {
log_error("Volume group \"%s\" not found, is inconsistent "
"or has PVs missing.", vg ? vg->name : "");
log_error("Consider vgreduce --removemissing if metadata "
"is inconsistent.");
return 0;
}
if (!vg_check_status(vg, EXPORTED_VG))
return 0;
lv_count = vg_visible_lvs(vg);
if (lv_count) {
log_error("Volume group \"%s\" still contains %u "
"logical volume(s)", vg->name, lv_count);
return 0;
}
if (!archive(vg))
return 0;
return 1;
}
void vg_remove_pvs(struct volume_group *vg)
{
struct pv_list *pvl, *tpvl;
dm_list_iterate_items_safe(pvl, tpvl, &vg->pvs) {
del_pvl_from_vgs(vg, pvl);
dm_list_add(&vg->removed_pvs, &pvl->list);
}
}
int vg_remove(struct volume_group *vg)
{
struct physical_volume *pv;
struct pv_list *pvl;
int ret = 1;
if (!lock_vol(vg->cmd, VG_ORPHANS, LCK_VG_WRITE)) {
log_error("Can't get lock for orphan PVs");
return 0;
}
if (!vg_remove_mdas(vg)) {
log_error("vg_remove_mdas %s failed", vg->name);
unlock_vg(vg->cmd, VG_ORPHANS);
return 0;
}
/* init physical volumes */
dm_list_iterate_items(pvl, &vg->removed_pvs) {
pv = pvl->pv;
if (is_missing_pv(pv))
continue;
log_verbose("Removing physical volume \"%s\" from "
"volume group \"%s\"", pv_dev_name(pv), vg->name);
2008-02-06 18:47:28 +03:00
pv->vg_name = vg->fid->fmt->orphan_vg_name;
pv->status = ALLOCATABLE_PV;
if (!dev_get_size(pv_dev(pv), &pv->size)) {
log_error("%s: Couldn't get size.", pv_dev_name(pv));
ret = 0;
continue;
}
/* FIXME Write to same sector label was read from */
if (!pv_write(vg->cmd, pv, NULL, INT64_C(-1))) {
log_error("Failed to remove physical volume \"%s\""
" from volume group \"%s\"",
pv_dev_name(pv), vg->name);
ret = 0;
}
}
backup_remove(vg->cmd, vg->name);
if (ret)
log_print("Volume group \"%s\" successfully removed", vg->name);
else
log_error("Volume group \"%s\" not properly removed", vg->name);
unlock_vg(vg->cmd, VG_ORPHANS);
return ret;
}
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_name: device path of PV to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
*
*/
static int vg_extend_single_pv(struct volume_group *vg, char *pv_name,
struct pvcreate_params *pp)
{
struct physical_volume *pv;
pv = pv_by_path(vg->fid->fmt->cmd, pv_name);
if (!pv && !pp) {
log_error("%s not identified as an existing "
"physical volume", pv_name);
return 0;
} else if (!pv && pp) {
pv = pvcreate_single(vg->cmd, pv_name, pp);
if (!pv)
return 0;
}
if (!add_pv_to_vg(vg, pv_name, pv))
return 0;
return 1;
}
/*
* Extend a VG by a single PV / device path
*
* Parameters:
* - vg: handle of volume group to extend by 'pv_name'
* - pv_count: count of device paths of PVs
* - pv_names: device paths of PVs to add to VG
* - pp: parameters to pass to implicit pvcreate; if NULL, do not pvcreate
*
*/
int vg_extend(struct volume_group *vg, int pv_count, char **pv_names,
struct pvcreate_params *pp)
2001-10-16 02:04:27 +04:00
{
int i;
if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
return 0;
2001-10-16 02:04:27 +04:00
/* attach each pv */
for (i = 0; i < pv_count; i++) {
unescape_colons_and_at_signs(pv_names[i], NULL, NULL);
if (!vg_extend_single_pv(vg, pv_names[i], pp))
goto bad;
}
2001-10-16 02:04:27 +04:00
2002-11-18 17:04:08 +03:00
/* FIXME Decide whether to initialise and add new mdahs to format instance */
2001-10-16 02:04:27 +04:00
return 1;
2007-07-12 08:12:04 +04:00
bad:
log_error("Unable to add physical volume '%s' to "
"volume group '%s'.", pv_names[i], vg->name);
return 0;
2001-10-16 02:04:27 +04:00
}
/* FIXME: use this inside vgreduce_single? */
int vg_reduce(struct volume_group *vg, char *pv_name)
{
struct physical_volume *pv;
struct pv_list *pvl;
if (_vg_bad_status_bits(vg, RESIZEABLE_VG))
return 0;
if (!archive(vg))
goto bad;
/* remove each pv */
if (!(pvl = find_pv_in_vg(vg, pv_name))) {
log_error("Physical volume %s not in volume group %s.",
pv_name, vg->name);
goto bad;
}
pv = pvl->pv;
if (pv_pe_alloc_count(pv)) {
log_error("Physical volume %s still in use.",
pv_name);
goto bad;
}
if (!dev_get_size(pv_dev(pv), &pv->size)) {
log_error("%s: Couldn't get size.", pv_name);
goto bad;
}
vg->free_count -= pv_pe_count(pv) - pv_pe_alloc_count(pv);
vg->extent_count -= pv_pe_count(pv);
del_pvl_from_vgs(vg, pvl);
/* add pv to the remove_pvs list */
dm_list_add(&vg->removed_pvs, &pvl->list);
return 1;
bad:
log_error("Unable to remove physical volume '%s' from "
"volume group '%s'.", pv_name, vg->name);
return 0;
}
int lv_change_tag(struct logical_volume *lv, const char *tag, int add_tag)
{
char *tag_new;
if (!(lv->vg->fid->fmt->features & FMT_TAGS)) {
log_error("Logical volume %s/%s does not support tags",
lv->vg->name, lv->name);
return 0;
}
if (add_tag) {
if (!(tag_new = dm_pool_strdup(lv->vg->vgmem, tag))) {
log_error("Failed to duplicate tag %s from %s/%s",
tag, lv->vg->name, lv->name);
return 0;
}
if (!str_list_add(lv->vg->vgmem, &lv->tags, tag_new)) {
log_error("Failed to add tag %s to %s/%s",
tag, lv->vg->name, lv->name);
return 0;
}
} else {
if (!str_list_del(&lv->tags, tag)) {
log_error("Failed to remove tag %s from %s/%s",
tag, lv->vg->name, lv->name);
return 0;
}
}
return 1;
}
int vg_change_tag(struct volume_group *vg, const char *tag, int add_tag)
{
char *tag_new;
if (!(vg->fid->fmt->features & FMT_TAGS)) {
log_error("Volume group %s does not support tags", vg->name);
return 0;
}
if (add_tag) {
if (!(tag_new = dm_pool_strdup(vg->vgmem, tag))) {
log_error("Failed to duplicate tag %s from %s",
tag, vg->name);
return 0;
}
if (!str_list_add(vg->vgmem, &vg->tags, tag_new)) {
log_error("Failed to add tag %s to volume group %s",
tag, vg->name);
return 0;
}
} else {
if (!str_list_del(&vg->tags, tag)) {
log_error("Failed to remove tag %s from volume group "
"%s", tag, vg->name);
return 0;
}
}
return 1;
}
2001-11-12 18:10:01 +03:00
const char *strip_dir(const char *vg_name, const char *dev_dir)
{
size_t len = strlen(dev_dir);
if (!strncmp(vg_name, dev_dir, len))
vg_name += len;
return vg_name;
}
/*
* Validate parameters to vg_create() before calling.
* FIXME: Move inside vg_create library function.
* FIXME: Change vgcreate_params struct to individual gets/sets
*/
int vgcreate_params_validate(struct cmd_context *cmd,
struct vgcreate_params *vp)
{
if (!validate_new_vg_name(cmd, vp->vg_name)) {
log_error("New volume group name \"%s\" is invalid",
vp->vg_name);
return 1;
}
if (vp->alloc == ALLOC_INHERIT) {
log_error("Volume Group allocation policy cannot inherit "
"from anything");
return 1;
}
if (!vp->extent_size) {
log_error("Physical extent size may not be zero");
return 1;
}
if (!(cmd->fmt->features & FMT_UNLIMITED_VOLS)) {
if (!vp->max_lv)
vp->max_lv = 255;
if (!vp->max_pv)
vp->max_pv = 255;
if (vp->max_lv > 255 || vp->max_pv > 255) {
log_error("Number of volumes may not exceed 255");
return 1;
}
}
return 0;
}
/*
* Create a (struct volume_group) volume group handle from a struct volume_group pointer and a
* possible failure code or zero for success.
*/
static struct volume_group *_vg_make_handle(struct cmd_context *cmd,
struct volume_group *vg,
uint32_t failure)
{
struct dm_pool *vgmem;
if (!vg) {
if (!(vgmem = dm_pool_create("lvm2 vg_handle", VG_MEMPOOL_CHUNK)) ||
!(vg = dm_pool_zalloc(vgmem, sizeof(*vg)))) {
log_error("Error allocating vg handle.");
if (vgmem)
dm_pool_destroy(vgmem);
return_NULL;
}
vg->vgmem = vgmem;
}
vg->read_status = failure;
return (struct volume_group *)vg;
}
int lv_has_unknown_segments(const struct logical_volume *lv)
{
struct lv_segment *seg;
/* foreach segment */
dm_list_iterate_items(seg, &lv->segments)
if (seg_unknown(seg))
return 1;
return 0;
}
int vg_has_unknown_segments(const struct volume_group *vg)
{
struct lv_list *lvl;
/* foreach LV */
dm_list_iterate_items(lvl, &vg->lvs)
if (lv_has_unknown_segments(lvl->lv))
return 1;
return 0;
}
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
/*
* Create a VG with default parameters.
* Returns:
* - struct volume_group* with SUCCESS code: VG structure created
* - NULL or struct volume_group* with FAILED_* code: error creating VG structure
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
* Use vg_read_error() to determine success or failure.
* FIXME: cleanup usage of _vg_make_handle()
*/
struct volume_group *vg_create(struct cmd_context *cmd, const char *vg_name)
2001-10-12 18:25:53 +04:00
{
struct volume_group *vg;
2002-11-18 17:04:08 +03:00
int consistent = 0;
struct dm_pool *mem;
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
uint32_t rc;
if (!validate_name(vg_name)) {
log_error("Invalid vg name %s", vg_name);
/* FIXME: use _vg_make_handle() w/proper error code */
return NULL;
}
rc = vg_lock_newname(cmd, vg_name);
if (rc != SUCCESS)
/* NOTE: let caller decide - this may be check for existence */
return _vg_make_handle(cmd, NULL, rc);
2001-10-12 18:25:53 +04:00
/* FIXME: Is this vg_read_internal necessary? Move it inside
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
vg_lock_newname? */
2001-10-12 18:25:53 +04:00
/* is this vg name already in use ? */
if ((vg = vg_read_internal(cmd, vg_name, NULL, &consistent))) {
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
log_error("A volume group called '%s' already exists.", vg_name);
unlock_and_release_vg(cmd, vg, vg_name);
return _vg_make_handle(cmd, NULL, FAILED_EXIST);
2001-10-12 18:25:53 +04:00
}
if (!(mem = dm_pool_create("lvm2 vg_create", VG_MEMPOOL_CHUNK)))
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
goto_bad;
if (!(vg = dm_pool_zalloc(mem, sizeof(*vg))))
goto_bad;
2001-10-12 18:25:53 +04:00
if (!id_create(&vg->id)) {
log_error("Couldn't create uuid for volume group '%s'.",
vg_name);
2001-10-12 18:25:53 +04:00
goto bad;
}
2001-11-14 16:52:38 +03:00
/* Strip dev_dir if present */
vg_name = strip_dir(vg_name, cmd->dev_dir);
2001-10-15 22:39:40 +04:00
vg->vgmem = mem;
vg->cmd = cmd;
2002-01-07 18:27:55 +03:00
2008-01-30 16:19:47 +03:00
if (!(vg->name = dm_pool_strdup(mem, vg_name)))
goto_bad;
2001-10-12 18:25:53 +04:00
vg->seqno = 0;
vg->status = (RESIZEABLE_VG | LVM_READ | LVM_WRITE);
if (!(vg->system_id = dm_pool_alloc(mem, NAME_LEN)))
goto_bad;
*vg->system_id = '\0';
2001-10-12 18:25:53 +04:00
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
vg->extent_size = DEFAULT_EXTENT_SIZE * 2;
2001-10-15 22:39:40 +04:00
vg->extent_count = 0;
vg->free_count = 0;
2001-10-12 18:25:53 +04:00
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
vg->max_lv = DEFAULT_MAX_LV;
vg->max_pv = DEFAULT_MAX_PV;
2001-10-12 18:25:53 +04:00
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
vg->alloc = DEFAULT_ALLOC_POLICY;
vg->mda_copies = DEFAULT_VGMETADATACOPIES;
2001-10-15 22:39:40 +04:00
vg->pv_count = 0;
dm_list_init(&vg->pvs);
2001-10-12 18:25:53 +04:00
dm_list_init(&vg->lvs);
2001-10-12 18:25:53 +04:00
dm_list_init(&vg->tags);
2004-03-08 20:19:15 +03:00
/* initialize removed_pvs list */
dm_list_init(&vg->removed_pvs);
if (!(vg->fid = cmd->fmt->ops->create_instance(cmd->fmt, vg_name,
NULL, NULL))) {
log_error("Failed to create format instance");
goto bad;
}
if (vg->fid->fmt->ops->vg_setup &&
!vg->fid->fmt->ops->vg_setup(vg->fid, vg)) {
2001-10-15 22:39:40 +04:00
log_error("Format specific setup of volume group '%s' failed.",
vg_name);
2001-10-12 18:25:53 +04:00
goto bad;
}
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
return _vg_make_handle(cmd, vg, SUCCESS);
2001-10-12 18:25:53 +04:00
Change vg_create() to take only minimal parameters and obtain a lock. vg_t *vg_create(struct cmd_context *cmd, const char *vg_name); This is the first step towards the API called to create a VG. Call vg_lock_newname() inside this function. Use _vg_make_handle() where possible. Now we have 2 ways to construct a volume group: 1) vg_read: Used when constructing an existing VG from disks 2) vg_create: Used when constructing a new VG Both of these interfaces obtain a lock, and return a vg_t *. The usage of _vg_make_handle() inside vg_create() doesn't fit perfectly but it's ok for now. Needs some cleanup though and I've noted "FIXME" in the code. Add the new vg_create() plus vg 'set' functions for non-default VG parameters in the following tools: - vgcreate: Fairly straightforward refactoring. We just moved vg_lock_newname inside vg_create so we check the return via vg_read_error. - vgsplit: The refactoring here is a bit more tricky. Originally we called vg_lock_newname and depending on the error code, we either read the existing vg or created the new one. Now vg_create() calls vg_lock_newname, so we first try to create the VG. If this fails with FAILED_EXIST, we can then do the vg_read. If the create succeeds, we check the input parameters and set any new values on the VG. TODO in future patches: 1. The VG_ORPHAN lock needs some thought. We may want to treat this as any other VG, and require the application to obtain a handle and pass it to other API calls (for example, vg_extend). Or, we may find that hiding the VG_ORPHAN lock inside other APIs is the way to go. I thought of placing the VG_ORPHAN lock inside vg_create() and tying it to the vg handle, but was not certain this was the right approach. 2. Cleanup error paths. Integrate vg_read_error() with vg_create and vg_read* error codes and/or the new error APIs. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2009-07-09 14:09:33 +04:00
bad:
unlock_and_release_vg(cmd, vg, vg_name);
/* FIXME: use _vg_make_handle() w/proper error code */
2001-10-12 18:25:53 +04:00
return NULL;
}
uint64_t extents_from_size(struct cmd_context *cmd, uint64_t size,
uint32_t extent_size)
{
if (size % extent_size) {
size += extent_size - size % extent_size;
log_print("Rounding up size to full physical extent %s",
display_size(cmd, size));
}
if (size > (uint64_t) UINT32_MAX * extent_size) {
log_error("Volume too large (%s) for extent size %s. "
"Upper limit is %s.",
display_size(cmd, size),
display_size(cmd, (uint64_t) extent_size),
display_size(cmd, (uint64_t) UINT32_MAX *
extent_size));
return 0;
}
return (uint64_t) size / extent_size;
}
/*
* Return random integer in [0,max) interval
*
* The loop rejects numbers that come from an "incomplete" slice of the
* RAND_MAX space (considering the number space [0, RAND_MAX] is divided
* into some "max"-sized slices and at most a single smaller slice,
* between [n*max, RAND_MAX] for suitable n -- numbers from this last slice
* are discarded because they could distort the distribution in favour of
* smaller numbers.
*/
static unsigned _even_rand( unsigned *seed, unsigned max )
{
unsigned r, ret;
/* make sure distribution is even */
do {
r = (unsigned) rand_r( seed );
ret = r % max;
} while ( r - ret > RAND_MAX - max );
return ret;
}
static dm_bitset_t _bitset_with_random_bits(struct dm_pool *mem, uint32_t num_bits,
uint32_t num_set_bits, unsigned *seed)
{
dm_bitset_t bs;
unsigned bit_selected;
char buf[32];
uint32_t i = num_bits - num_set_bits;
if (!(bs = dm_bitset_create(mem, (unsigned) num_bits))) {
log_error("Failed to allocate bitset for setting random bits.");
return NULL;
}
if (!dm_pool_begin_object(mem, 512)) {
log_error("dm_pool_begin_object failed for random list of bits.");
dm_pool_free(mem, bs);
return NULL;
}
/* Perform loop num_set_bits times, selecting one bit each time */
while (i++ < num_bits) {
/* Select a random bit between 0 and (i-1) inclusive. */
bit_selected = _even_rand(seed, i);
/*
* If the bit was already set, set the new bit that became
* choosable for the first time during this pass.
* This maintains a uniform probability distribution by compensating
* for being unable to select it until this pass.
*/
if (dm_bit(bs, bit_selected))
bit_selected = i - 1;
dm_bit_set(bs, bit_selected);
if (dm_snprintf(buf, sizeof(buf), "%u ", bit_selected) < 0) {
log_error("snprintf random bit failed.");
dm_pool_free(mem, bs);
return NULL;
}
if (!dm_pool_grow_object(mem, buf, strlen(buf))) {
log_error("Failed to generate list of random bits.");
dm_pool_free(mem, bs);
return NULL;
}
}
log_debug("Selected %" PRIu32 " random bits from %" PRIu32 ": %s", num_set_bits, num_bits, (char *) dm_pool_end_object(mem));
return bs;
}
static int _vg_ignore_mdas(struct volume_group *vg, uint32_t num_to_ignore)
{
struct metadata_area *mda;
uint32_t mda_used_count = vg_mda_used_count(vg);
dm_bitset_t mda_to_ignore_bs;
int r = 1;
2010-07-06 21:27:32 +04:00
log_debug("Adjusting ignored mdas for %s: %" PRIu32 " of %" PRIu32 " mdas in use "
2010-07-06 21:29:50 +04:00
"but %" PRIu32 " required. Changing %" PRIu32 " mda.",
vg->name, mda_used_count, vg_mda_count(vg), vg_mda_copies(vg), num_to_ignore);
2010-06-30 17:51:11 +04:00
if (!num_to_ignore)
return 1;
2010-06-30 17:51:11 +04:00
if (!(mda_to_ignore_bs = _bitset_with_random_bits(vg->vgmem, mda_used_count,
num_to_ignore, &vg->cmd->rand_seed)))
return_0;
2010-06-30 23:28:35 +04:00
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use)
if (!mda_is_ignored(mda) && (--mda_used_count,
dm_bit(mda_to_ignore_bs, mda_used_count))) {
mda_set_ignored(mda, 1);
2010-06-30 23:28:35 +04:00
if (!--num_to_ignore)
goto out;
}
2010-06-30 17:51:11 +04:00
log_error(INTERNAL_ERROR "Unable to find %"PRIu32" metadata areas to ignore "
"on volume group %s", num_to_ignore, vg->name);
2010-06-30 17:51:11 +04:00
r = 0;
out:
dm_pool_free(vg->vgmem, mda_to_ignore_bs);
return r;
}
static int _vg_unignore_mdas(struct volume_group *vg, uint32_t num_to_unignore)
{
struct metadata_area *mda, *tmda;
uint32_t mda_used_count = vg_mda_used_count(vg);
uint32_t mda_count = vg_mda_count(vg);
uint32_t mda_free_count = mda_count - mda_used_count;
dm_bitset_t mda_to_unignore_bs;
int r = 1;
if (!num_to_unignore)
return 1;
2010-06-30 17:51:11 +04:00
2010-07-06 21:27:32 +04:00
log_debug("Adjusting ignored mdas for %s: %" PRIu32 " of %" PRIu32 " mdas in use "
2010-07-06 21:29:50 +04:00
"but %" PRIu32 " required. Changing %" PRIu32 " mda.",
vg->name, mda_used_count, mda_count, vg_mda_copies(vg), num_to_unignore);
if (!(mda_to_unignore_bs = _bitset_with_random_bits(vg->vgmem, mda_free_count,
num_to_unignore, &vg->cmd->rand_seed)))
return_0;
2010-06-30 17:51:11 +04:00
2010-06-30 23:28:35 +04:00
dm_list_iterate_items_safe(mda, tmda, &vg->fid->metadata_areas_ignored)
if (mda_is_ignored(mda) && (--mda_free_count,
dm_bit(mda_to_unignore_bs, mda_free_count))) {
mda_set_ignored(mda, 0);
dm_list_move(&vg->fid->metadata_areas_in_use,
&mda->list);
2010-06-30 23:28:35 +04:00
if (!--num_to_unignore)
goto out;
}
2010-06-30 17:51:11 +04:00
2010-06-30 23:28:35 +04:00
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use)
if (mda_is_ignored(mda) && (--mda_free_count,
dm_bit(mda_to_unignore_bs, mda_free_count))) {
mda_set_ignored(mda, 0);
2010-06-30 23:28:35 +04:00
if (!--num_to_unignore)
goto out;
}
2010-06-30 17:51:11 +04:00
log_error(INTERNAL_ERROR "Unable to find %"PRIu32" metadata areas to unignore "
"on volume group %s", num_to_unignore, vg->name);
r = 0;
out:
dm_pool_free(vg->vgmem, mda_to_unignore_bs);
return r;
}
static int _vg_adjust_ignored_mdas(struct volume_group *vg)
{
2010-06-30 23:28:35 +04:00
uint32_t mda_copies_used = vg_mda_used_count(vg);
2010-06-30 23:28:35 +04:00
if (vg->mda_copies == VGMETADATACOPIES_UNMANAGED) {
/* Ensure at least one mda is in use. */
if (!mda_copies_used && vg_mda_count(vg) && !_vg_unignore_mdas(vg, 1))
return_0;
else
2010-06-30 23:28:35 +04:00
return 1;
}
2010-06-30 17:51:11 +04:00
2010-06-30 23:28:35 +04:00
/* Not an error to have vg_mda_count larger than total mdas. */
if (vg->mda_copies == VGMETADATACOPIES_ALL ||
vg->mda_copies >= vg_mda_count(vg)) {
/* Use all */
if (!_vg_unignore_mdas(vg, vg_mda_count(vg) - mda_copies_used))
return_0;
} else if (mda_copies_used < vg->mda_copies) {
if (!_vg_unignore_mdas(vg, vg->mda_copies - mda_copies_used))
return_0;
} else if (mda_copies_used > vg->mda_copies)
if (!_vg_ignore_mdas(vg, mda_copies_used - vg->mda_copies))
return_0;
/*
* The VGMETADATACOPIES_ALL value will never be written disk.
* It is a special cmdline value that means 2 things:
* 1. clear all ignore bits in all mdas in this vg
* 2. set the "unmanaged" policy going forward for metadata balancing
*/
if (vg->mda_copies == VGMETADATACOPIES_ALL)
vg->mda_copies = VGMETADATACOPIES_UNMANAGED;
2010-06-30 17:51:11 +04:00
return 1;
}
uint64_t find_min_mda_size(struct dm_list *mdas)
{
uint64_t min_mda_size = UINT64_MAX, mda_size;
struct metadata_area *mda;
dm_list_iterate_items(mda, mdas) {
if (!mda->ops->mda_total_sectors)
continue;
mda_size = mda->ops->mda_total_sectors(mda);
if (mda_size < min_mda_size)
min_mda_size = mda_size;
}
if (min_mda_size == UINT64_MAX)
min_mda_size = UINT64_C(0);
return min_mda_size;
}
static int _move_mdas(struct volume_group *vg_from, struct volume_group *vg_to,
struct dm_list *mdas_from, struct dm_list *mdas_to)
{
struct metadata_area *mda, *mda2;
int common_mda = 0;
dm_list_iterate_items_safe(mda, mda2, mdas_from) {
if (!mda->ops->mda_in_vg) {
common_mda = 1;
continue;
}
if (!mda->ops->mda_in_vg(vg_from->fid, vg_from, mda)) {
if (is_orphan_vg(vg_to->name))
dm_list_del(&mda->list);
else
dm_list_move(mdas_to, &mda->list);
}
}
return common_mda;
}
/*
* Separate metadata areas after splitting a VG.
* Also accepts orphan VG as destination (for vgreduce).
*/
int vg_split_mdas(struct cmd_context *cmd __attribute__((unused)),
struct volume_group *vg_from, struct volume_group *vg_to)
{
struct dm_list *mdas_from_in_use, *mdas_to_in_use;
struct dm_list *mdas_from_ignored, *mdas_to_ignored;
int common_mda = 0;
mdas_from_in_use = &vg_from->fid->metadata_areas_in_use;
mdas_from_ignored = &vg_from->fid->metadata_areas_ignored;
mdas_to_in_use = &vg_to->fid->metadata_areas_in_use;
mdas_to_ignored = &vg_to->fid->metadata_areas_ignored;
common_mda = _move_mdas(vg_from, vg_to,
mdas_from_in_use, mdas_to_in_use);
common_mda = _move_mdas(vg_from, vg_to,
mdas_from_ignored, mdas_to_ignored);
if ((dm_list_empty(mdas_from_in_use) &&
dm_list_empty(mdas_from_ignored)) ||
((!is_orphan_vg(vg_to->name) &&
dm_list_empty(mdas_to_in_use) &&
dm_list_empty(mdas_to_ignored))))
return common_mda;
return 1;
}
static int _wipe_sb(struct device *dev, const char *type, const char *name,
int wipe_len, struct pvcreate_params *pp,
int (*func)(struct device *dev, uint64_t *signature))
{
int wipe;
uint64_t superblock;
wipe = func(dev, &superblock);
if (wipe == -1) {
log_error("Fatal error while trying to detect %s on %s.",
type, name);
return 0;
}
if (wipe == 0)
return 1;
/* Specifying --yes => do not ask. */
if (!pp->yes && (pp->force == PROMPT) &&
yes_no_prompt("WARNING: %s detected on %s. Wipe it? [y/n] ",
type, name) != 'y') {
log_error("Aborting pvcreate on %s.", name);
return 0;
}
log_print("Wiping %s on %s.", type, name);
if (!dev_set(dev, superblock, wipe_len, 0)) {
log_error("Failed to wipe %s on %s.", type, name);
return 0;
}
return 1;
}
/*
* See if we may pvcreate on this device.
* 0 indicates we may not.
*/
static int pvcreate_check(struct cmd_context *cmd, const char *name,
struct pvcreate_params *pp)
{
struct physical_volume *pv;
struct device *dev;
struct dm_list mdas;
dm_list_init(&mdas);
/* FIXME Check partition type is LVM unless --force is given */
/* Is there a pv here already? */
pv = pv_read(cmd, name, &mdas, NULL, 0, 0);
/*
* If a PV has no MDAs it may appear to be an orphan until the
* metadata is read off another PV in the same VG. Detecting
* this means checking every VG by scanning every PV on the
* system.
*/
if (pv && is_orphan(pv) && mdas_empty_or_ignored(&mdas)) {
if (!scan_vgs_for_pvs(cmd))
return_0;
pv = pv_read(cmd, name, NULL, NULL, 0, 0);
}
/* Allow partial & exported VGs to be destroyed. */
/* We must have -ff to overwrite a non orphan */
if (pv && !is_orphan(pv) && pp->force != DONT_PROMPT_OVERRIDE) {
log_error("Can't initialize physical volume \"%s\" of "
"volume group \"%s\" without -ff", name, pv_vg_name(pv));
return 0;
}
/* prompt */
if (pv && !is_orphan(pv) && !pp->yes &&
yes_no_prompt(_really_init, name, pv_vg_name(pv)) == 'n') {
log_error("%s: physical volume not initialized", name);
return 0;
}
if (sigint_caught())
return 0;
dev = dev_cache_get(name, cmd->filter);
/* Is there an md superblock here? */
/* FIXME: still possible issues here - rescan cache? */
if (!dev && md_filtering()) {
refresh_filters(cmd);
init_md_filtering(0);
dev = dev_cache_get(name, cmd->filter);
init_md_filtering(1);
}
if (!dev) {
log_error("Device %s not found (or ignored by filtering).", name);
return 0;
}
/*
* This test will fail if the device belongs to an MD array.
*/
if (!dev_test_excl(dev)) {
/* FIXME Detect whether device-mapper itself is still using it */
log_error("Can't open %s exclusively. Mounted filesystem?",
name);
return 0;
}
if (!_wipe_sb(dev, "software RAID md superblock", name, 4, pp, dev_is_md))
return 0;
if (!_wipe_sb(dev, "swap signature", name, 10, pp, dev_is_swap))
return 0;
if (!_wipe_sb(dev, "LUKS signature", name, 8, pp, dev_is_luks))
return 0;
if (sigint_caught())
return 0;
if (pv && !is_orphan(pv) && pp->force) {
log_warn("WARNING: Forcing physical volume creation on "
"%s%s%s%s", name,
!is_orphan(pv) ? " of volume group \"" : "",
!is_orphan(pv) ? pv_vg_name(pv) : "",
!is_orphan(pv) ? "\"" : "");
}
return 1;
}
void pvcreate_params_set_defaults(struct pvcreate_params *pp)
{
memset(pp, 0, sizeof(*pp));
pp->zero = 1;
pp->size = 0;
pp->data_alignment = UINT64_C(0);
pp->data_alignment_offset = UINT64_C(0);
pp->pvmetadatacopies = DEFAULT_PVMETADATACOPIES;
pp->pvmetadatasize = DEFAULT_PVMETADATASIZE;
pp->labelsector = DEFAULT_LABELSECTOR;
pp->idp = 0;
pp->pe_start = 0;
pp->extent_count = 0;
pp->extent_size = 0;
pp->restorefile = 0;
pp->force = PROMPT;
pp->yes = 0;
pp->metadataignore = DEFAULT_PVMETADATAIGNORE;
}
/*
2009-10-06 20:00:38 +04:00
* pvcreate_single() - initialize a device with PV label and metadata area
*
* Parameters:
* - pv_name: device path to initialize
2009-10-06 20:00:38 +04:00
* - pp: parameters to pass to pv_create; if NULL, use default values
*
* Returns:
* NULL: error
* struct physical_volume * (non-NULL): handle to physical volume created
*/
2009-10-06 20:00:38 +04:00
struct physical_volume * pvcreate_single(struct cmd_context *cmd,
const char *pv_name,
struct pvcreate_params *pp)
{
struct physical_volume *pv;
struct device *dev;
struct dm_list mdas;
struct pvcreate_params default_pp;
char buffer[64] __attribute__((aligned(8)));
pvcreate_params_set_defaults(&default_pp);
if (!pp)
pp = &default_pp;
if (pp->idp) {
if ((dev = device_from_pvid(cmd, pp->idp, NULL)) &&
(dev != dev_cache_get(pv_name, cmd->filter))) {
if (!id_write_format((const struct id*)&pp->idp->uuid,
buffer, sizeof(buffer)))
return_NULL;
log_error("uuid %s already in use on \"%s\"", buffer,
dev_name(dev));
return NULL;
}
}
if (!pvcreate_check(cmd, pv_name, pp))
goto error;
if (sigint_caught())
goto error;
if (!(dev = dev_cache_get(pv_name, cmd->filter))) {
log_error("%s: Couldn't find device. Check your filters?",
pv_name);
goto error;
}
dm_list_init(&mdas);
if (!(pv = pv_create(cmd, dev, pp->idp, pp->size,
pp->data_alignment, pp->data_alignment_offset,
pp->pe_start, pp->extent_count, pp->extent_size,
pp->pvmetadatacopies, pp->pvmetadatasize,
pp->metadataignore, &mdas))) {
log_error("Failed to setup physical volume \"%s\"", pv_name);
goto error;
}
log_verbose("Set up physical volume for \"%s\" with %" PRIu64
" available sectors", pv_name, pv_size(pv));
/* Wipe existing label first */
if (!label_remove(pv_dev(pv))) {
log_error("Failed to wipe existing label on %s", pv_name);
goto error;
}
if (pp->zero) {
log_verbose("Zeroing start of device %s", pv_name);
if (!dev_open_quiet(dev)) {
log_error("%s not opened: device not zeroed", pv_name);
goto error;
}
if (!dev_set(dev, UINT64_C(0), (size_t) 2048, 0)) {
log_error("%s not wiped: aborting", pv_name);
dev_close(dev);
goto error;
}
dev_close(dev);
}
log_very_verbose("Writing physical volume data to disk \"%s\"",
pv_name);
if (!(pv_write(cmd, pv, &mdas, pp->labelsector))) {
log_error("Failed to write physical volume \"%s\"", pv_name);
goto error;
}
log_print("Physical volume \"%s\" successfully created", pv_name);
return pv;
error:
return NULL;
}
static void _free_pv(struct dm_pool *mem, struct physical_volume *pv)
{
dm_pool_free(mem, pv);
}
static struct physical_volume *_alloc_pv(struct dm_pool *mem, struct device *dev)
{
struct physical_volume *pv = dm_pool_zalloc(mem, sizeof(*pv));
2008-01-30 16:19:47 +03:00
if (!pv)
return_NULL;
pv->pe_size = 0;
pv->pe_start = 0;
pv->pe_count = 0;
pv->pe_alloc_count = 0;
pv->pe_align = 0;
pv->pe_align_offset = 0;
pv->fmt = NULL;
pv->dev = dev;
pv->status = ALLOCATABLE_PV;
dm_list_init(&pv->tags);
dm_list_init(&pv->segments);
return pv;
}
/**
* pv_create - initialize a physical volume for use with a volume group
*
* @fmt: format type
* @dev: PV device to initialize
* @size: size of the PV in sectors
* @data_alignment: requested alignment of data
* @data_alignment_offset: requested offset to aligned data
* @pe_start: physical extent start
* @existing_extent_count
* @existing_extent_size
* @pvmetadatacopies
* @pvmetadatasize
* @mdas
*
* Returns:
* PV handle - physical volume initialized successfully
* NULL - invalid parameter or problem initializing the physical volume
*
* Note:
* FIXME: shorten argument list and replace with explict 'set' functions
*/
struct physical_volume *pv_create(const struct cmd_context *cmd,
struct device *dev,
struct id *id, uint64_t size,
unsigned long data_alignment,
unsigned long data_alignment_offset,
uint64_t pe_start,
uint32_t existing_extent_count,
uint32_t existing_extent_size,
int pvmetadatacopies, uint64_t pvmetadatasize,
unsigned metadataignore, struct dm_list *mdas)
2001-09-25 16:49:28 +04:00
{
const struct format_type *fmt = cmd->fmt;
struct dm_pool *mem = fmt->cmd->mem;
struct physical_volume *pv = _alloc_pv(mem, dev);
if (!pv)
return NULL;
if (id)
memcpy(&pv->id, id, sizeof(*id));
else if (!id_create(&pv->id)) {
log_error("Failed to create random uuid for %s.",
dev_name(dev));
goto bad;
}
if (!dev_get_size(pv->dev, &pv->size)) {
log_error("%s: Couldn't get size.", pv_dev_name(pv));
goto bad;
}
if (size) {
if (size > pv->size)
log_warn("WARNING: %s: Overriding real size. "
"You could lose data.", pv_dev_name(pv));
log_verbose("%s: Pretending size is %" PRIu64 " sectors.",
pv_dev_name(pv), size);
pv->size = size;
}
if (pv->size < PV_MIN_SIZE) {
2002-11-18 17:04:08 +03:00
log_error("%s: Size must exceed minimum of %ld sectors.",
pv_dev_name(pv), PV_MIN_SIZE);
goto bad;
}
if (pv->size < data_alignment) {
log_error("%s: Data alignment must not exceed device size.",
pv_dev_name(pv));
goto bad;
}
2002-11-18 17:04:08 +03:00
pv->fmt = fmt;
2008-02-06 18:47:28 +03:00
pv->vg_name = fmt->orphan_vg_name;
2002-02-15 17:33:59 +03:00
2002-11-18 17:04:08 +03:00
if (!fmt->ops->pv_setup(fmt, pe_start, existing_extent_count,
existing_extent_size, data_alignment,
data_alignment_offset,
pvmetadatacopies, pvmetadatasize,
metadataignore, mdas, pv, NULL)) {
log_error("%s: Format-specific setup of physical volume "
"failed.", pv_dev_name(pv));
2002-02-15 17:33:59 +03:00
goto bad;
}
return pv;
2001-10-15 22:39:40 +04:00
bad:
_free_pv(mem, pv);
return NULL;
2001-09-25 16:49:28 +04:00
}
/* FIXME: liblvm todo - make into function that returns handle */
2008-03-14 01:51:24 +03:00
struct pv_list *find_pv_in_vg(const struct volume_group *vg,
const char *pv_name)
{
return _find_pv_in_vg(vg, pv_name);
}
2008-03-14 01:51:24 +03:00
static struct pv_list *_find_pv_in_vg(const struct volume_group *vg,
const char *pv_name)
2001-10-15 22:39:40 +04:00
{
struct pv_list *pvl;
dm_list_iterate_items(pvl, &vg->pvs)
if (pvl->pv->dev == dev_cache_get(pv_name, vg->cmd->filter))
return pvl;
2001-09-25 16:49:28 +04:00
2001-10-15 22:39:40 +04:00
return NULL;
2002-11-18 17:04:08 +03:00
}
struct pv_list *find_pv_in_pv_list(const struct dm_list *pl,
const struct physical_volume *pv)
{
struct pv_list *pvl;
dm_list_iterate_items(pvl, pl)
if (pvl->pv == pv)
return pvl;
2008-04-10 23:59:43 +04:00
return NULL;
}
int pv_is_in_vg(struct volume_group *vg, struct physical_volume *pv)
{
2005-06-01 20:51:55 +04:00
struct pv_list *pvl;
dm_list_iterate_items(pvl, &vg->pvs)
2005-06-01 20:51:55 +04:00
if (pv == pvl->pv)
return 1;
return 0;
}
static struct pv_list *_find_pv_in_vg_by_uuid(const struct volume_group *vg,
const struct id *id)
{
struct pv_list *pvl;
dm_list_iterate_items(pvl, &vg->pvs)
if (id_equal(&pvl->pv->id, id))
return pvl;
return NULL;
}
/**
* find_pv_in_vg_by_uuid - Find PV in VG by PV UUID
* @vg: volume group to search
* @id: UUID of the PV to match
*
* Returns:
* struct pv_list within owning struct volume_group - if UUID of PV found in VG
* NULL - invalid parameter or UUID of PV not found in VG
*
* Note
* FIXME - liblvm todo - make into function that takes VG handle
*/
struct pv_list *find_pv_in_vg_by_uuid(const struct volume_group *vg,
const struct id *id)
{
return _find_pv_in_vg_by_uuid(vg, id);
}
2008-03-14 01:51:24 +03:00
struct lv_list *find_lv_in_vg(const struct volume_group *vg,
const char *lv_name)
2001-10-29 16:52:23 +03:00
{
struct lv_list *lvl;
2001-10-29 16:52:23 +03:00
const char *ptr;
/* Use last component */
if ((ptr = strrchr(lv_name, '/')))
ptr++;
else
ptr = lv_name;
2001-10-31 15:47:01 +03:00
dm_list_iterate_items(lvl, &vg->lvs)
2002-01-21 19:49:32 +03:00
if (!strcmp(lvl->lv->name, ptr))
return lvl;
2001-10-29 16:52:23 +03:00
return NULL;
2001-10-29 16:52:23 +03:00
}
struct lv_list *find_lv_in_lv_list(const struct dm_list *ll,
const struct logical_volume *lv)
{
struct lv_list *lvl;
dm_list_iterate_items(lvl, ll)
if (lvl->lv == lv)
return lvl;
2008-04-10 23:59:43 +04:00
return NULL;
}
struct lv_list *find_lv_in_vg_by_lvid(struct volume_group *vg,
const union lvid *lvid)
{
struct lv_list *lvl;
dm_list_iterate_items(lvl, &vg->lvs)
if (!strncmp(lvl->lv->lvid.s, lvid->s, sizeof(*lvid)))
return lvl;
return NULL;
}
2008-03-14 01:51:24 +03:00
struct logical_volume *find_lv(const struct volume_group *vg,
const char *lv_name)
2001-10-29 16:52:23 +03:00
{
struct lv_list *lvl = find_lv_in_vg(vg, lv_name);
2002-01-21 19:49:32 +03:00
return lvl ? lvl->lv : NULL;
2001-10-29 16:52:23 +03:00
}
struct physical_volume *find_pv(struct volume_group *vg, struct device *dev)
2001-10-29 16:52:23 +03:00
{
2005-06-01 20:51:55 +04:00
struct pv_list *pvl;
dm_list_iterate_items(pvl, &vg->pvs)
2005-06-01 20:51:55 +04:00
if (dev == pvl->pv->dev)
return pvl->pv;
return NULL;
2001-10-29 16:52:23 +03:00
}
/* FIXME: liblvm todo - make into function that returns handle */
2004-05-05 15:04:28 +04:00
struct physical_volume *find_pv_by_name(struct cmd_context *cmd,
const char *pv_name)
{
return _find_pv_by_name(cmd, pv_name);
}
static struct physical_volume *_find_pv_by_name(struct cmd_context *cmd,
const char *pv_name)
2004-05-05 15:04:28 +04:00
{
struct dm_list mdas;
2004-05-05 15:04:28 +04:00
struct physical_volume *pv;
dm_list_init(&mdas);
if (!(pv = _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0))) {
2004-05-05 15:04:28 +04:00
log_error("Physical volume %s not found", pv_name);
return NULL;
}
if (is_orphan_vg(pv->vg_name) && mdas_empty_or_ignored(&mdas)) {
/* If a PV has no MDAs - need to search all VGs for it */
if (!scan_vgs_for_pvs(cmd))
return_NULL;
if (!(pv = _pv_read(cmd, cmd->mem, pv_name, NULL, NULL, 1, 0))) {
log_error("Physical volume %s not found", pv_name);
return NULL;
}
}
if (is_orphan_vg(pv->vg_name)) {
2004-05-05 15:04:28 +04:00
log_error("Physical volume %s not in a volume group", pv_name);
return NULL;
}
return pv;
}
/* Find segment at a given logical extent in an LV */
2007-12-20 21:55:46 +03:00
struct lv_segment *find_seg_by_le(const struct logical_volume *lv, uint32_t le)
{
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv->segments)
if (le >= seg->le && le < seg->le + seg->len)
return seg;
return NULL;
}
2007-12-20 21:55:46 +03:00
struct lv_segment *first_seg(const struct logical_volume *lv)
2005-10-28 16:48:50 +04:00
{
struct lv_segment *seg;
2005-10-28 16:48:50 +04:00
dm_list_iterate_items(seg, &lv->segments)
return seg;
2005-10-28 16:48:50 +04:00
return NULL;
2005-10-28 16:48:50 +04:00
}
int vg_remove_mdas(struct volume_group *vg)
{
2002-11-18 17:04:08 +03:00
struct metadata_area *mda;
/* FIXME Improve recovery situation? */
/* Remove each copy of the metadata */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
2002-11-18 17:04:08 +03:00
if (mda->ops->vg_remove &&
2008-01-30 16:19:47 +03:00
!mda->ops->vg_remove(vg->fid, vg, mda))
return_0;
}
return 1;
}
/*
* Determine whether two vgs are compatible for merging.
*/
int vgs_are_compatible(struct cmd_context *cmd __attribute__((unused)),
struct volume_group *vg_from,
struct volume_group *vg_to)
{
struct lv_list *lvl1, *lvl2;
struct pv_list *pvl;
2008-01-17 20:17:09 +03:00
char *name1, *name2;
if (lvs_in_vg_activated(vg_from)) {
log_error("Logical volumes in \"%s\" must be inactive",
vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
/* Check compatibility */
if (vg_to->extent_size != vg_from->extent_size) {
log_error("Extent sizes differ: %d (%s) and %d (%s)",
vg_to->extent_size, vg_to->name,
vg_from->extent_size, vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
if (vg_to->max_pv &&
(vg_to->max_pv < vg_to->pv_count + vg_from->pv_count)) {
log_error("Maximum number of physical volumes (%d) exceeded "
" for \"%s\" and \"%s\"", vg_to->max_pv, vg_to->name,
vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
if (vg_to->max_lv &&
(vg_to->max_lv < vg_visible_lvs(vg_to) + vg_visible_lvs(vg_from))) {
log_error("Maximum number of logical volumes (%d) exceeded "
" for \"%s\" and \"%s\"", vg_to->max_lv, vg_to->name,
vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
/* Metadata types must be the same */
if (vg_to->fid->fmt != vg_from->fid->fmt) {
log_error("Metadata types differ for \"%s\" and \"%s\"",
vg_to->name, vg_from->name);
return 0;
}
/* Clustering attribute must be the same */
if (vg_is_clustered(vg_to) != vg_is_clustered(vg_from)) {
log_error("Clustered attribute differs for \"%s\" and \"%s\"",
vg_to->name, vg_from->name);
return 0;
}
/* Check no conflicts with LV names */
dm_list_iterate_items(lvl1, &vg_to->lvs) {
2008-01-17 20:17:09 +03:00
name1 = lvl1->lv->name;
dm_list_iterate_items(lvl2, &vg_from->lvs) {
2008-01-17 20:17:09 +03:00
name2 = lvl2->lv->name;
if (!strcmp(name1, name2)) {
log_error("Duplicate logical volume "
"name \"%s\" "
"in \"%s\" and \"%s\"",
name1, vg_to->name, vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
}
}
/* Check no PVs are constructed from either VG */
dm_list_iterate_items(pvl, &vg_to->pvs) {
if (pv_uses_vg(pvl->pv, vg_from)) {
log_error("Physical volume %s might be constructed "
"from same volume group %s.",
pv_dev_name(pvl->pv), vg_from->name);
2008-01-17 20:17:09 +03:00
return 0;
}
}
dm_list_iterate_items(pvl, &vg_from->pvs) {
if (pv_uses_vg(pvl->pv, vg_to)) {
log_error("Physical volume %s might be constructed "
"from same volume group %s.",
pv_dev_name(pvl->pv), vg_to->name);
2008-01-17 20:17:09 +03:00
return 0;
}
}
return 1;
}
struct _lv_postorder_baton {
int (*fn)(struct logical_volume *lv, void *data);
void *data;
};
static int _lv_postorder_visit(struct logical_volume *,
int (*fn)(struct logical_volume *lv, void *data),
void *data);
static int _lv_postorder_level(struct logical_volume *lv, void *data)
{
struct _lv_postorder_baton *baton = data;
if (lv->status & POSTORDER_OPEN_FLAG)
return 1; // a data structure loop has closed...
lv->status |= POSTORDER_OPEN_FLAG;
int r =_lv_postorder_visit(lv, baton->fn, baton->data);
lv->status &= ~POSTORDER_OPEN_FLAG;
lv->status |= POSTORDER_FLAG;
return r;
};
static int _lv_each_dependency(struct logical_volume *lv,
int (*fn)(struct logical_volume *lv, void *data),
void *data)
{
int i, s;
struct lv_segment *lvseg;
struct logical_volume *deps[] = {
(lv->rdevice && lv != lv->rdevice->lv) ? lv->rdevice->lv : 0,
(lv->rdevice && lv != lv->rdevice->slog) ? lv->rdevice->slog : 0,
lv->snapshot ? lv->snapshot->origin : 0,
lv->snapshot ? lv->snapshot->cow : 0 };
for (i = 0; i < sizeof(deps) / sizeof(*deps); ++i) {
if (deps[i] && !fn(deps[i], data))
return_0;
}
dm_list_iterate_items(lvseg, &lv->segments) {
if (lvseg->log_lv && !fn(lvseg->log_lv, data))
return_0;
if (lvseg->rlog_lv && !fn(lvseg->rlog_lv, data))
return_0;
for (s = 0; s < lvseg->area_count; ++s) {
if (seg_type(lvseg, s) == AREA_LV && !fn(seg_lv(lvseg,s), data))
return_0;
}
}
return 1;
}
static int _lv_postorder_cleanup(struct logical_volume *lv, void *data)
{
if (!(lv->status & POSTORDER_FLAG))
return 1;
lv->status &= ~POSTORDER_FLAG;
if (!_lv_each_dependency(lv, _lv_postorder_cleanup, data))
return_0;
return 1;
}
static int _lv_postorder_visit(struct logical_volume *lv,
int (*fn)(struct logical_volume *lv, void *data),
void *data)
{
struct _lv_postorder_baton baton;
int r;
if (lv->status & POSTORDER_FLAG)
return 1;
baton.fn = fn;
baton.data = data;
r = _lv_each_dependency(lv, _lv_postorder_level, &baton);
2009-05-30 05:54:29 +04:00
if (r)
r = fn(lv, data);
2009-05-30 05:54:29 +04:00
return r;
}
/*
* This will walk the LV dependency graph in depth-first order and in the
* postorder, call a callback function "fn". The void *data is passed along all
* the calls. The callback may return zero to indicate an error and terminate
* the depth-first walk. The error is propagated to return value of
* _lv_postorder.
*/
static int _lv_postorder(struct logical_volume *lv,
int (*fn)(struct logical_volume *lv, void *data),
void *data)
{
int r;
r = _lv_postorder_visit(lv, fn, data);
_lv_postorder_cleanup(lv, 0);
return r;
}
struct _lv_mark_if_partial_baton {
int partial;
};
static int _lv_mark_if_partial_collect(struct logical_volume *lv, void *data)
{
struct _lv_mark_if_partial_baton *baton = data;
if (lv->status & PARTIAL_LV)
baton->partial = 1;
return 1;
}
static int _lv_mark_if_partial_single(struct logical_volume *lv, void *data)
{
int s;
struct _lv_mark_if_partial_baton baton;
struct lv_segment *lvseg;
dm_list_iterate_items(lvseg, &lv->segments) {
for (s = 0; s < lvseg->area_count; ++s) {
if (seg_type(lvseg, s) == AREA_PV) {
2010-03-16 17:37:38 +03:00
if (is_missing_pv(seg_pv(lvseg, s)))
lv->status |= PARTIAL_LV;
}
}
}
baton.partial = 0;
_lv_each_dependency(lv, _lv_mark_if_partial_collect, &baton);
if (baton.partial)
lv->status |= PARTIAL_LV;
return 1;
}
static int _lv_mark_if_partial(struct logical_volume *lv)
{
return _lv_postorder(lv, _lv_mark_if_partial_single, NULL);
}
/*
* Mark LVs with missing PVs using PARTIAL_LV status flag. The flag is
* propagated transitively, so LVs referencing other LVs are marked
* partial as well, if any of their referenced LVs are marked partial.
*/
int vg_mark_partial_lvs(struct volume_group *vg)
{
struct logical_volume *lv;
struct lv_list *lvl;
dm_list_iterate_items(lvl, &vg->lvs) {
lv = lvl->lv;
if (!_lv_mark_if_partial(lv))
return_0;
}
return 1;
}
/*
* Be sure that all PV devices have cached read ahead in dev-cache
* Currently it takes read_ahead from first PV segment only
*/
static int _lv_read_ahead_single(struct logical_volume *lv, void *data)
{
struct lv_segment *seg = first_seg(lv);
uint32_t seg_read_ahead = 0, *read_ahead = data;
if (seg && seg->area_count && seg_type(seg, 0) == AREA_PV)
dev_get_read_ahead(seg_pv(seg, 0)->dev, &seg_read_ahead);
if (seg_read_ahead > *read_ahead)
*read_ahead = seg_read_ahead;
return 1;
}
/*
* Calculate readahead for logical volume from underlying PV devices.
* If read_ahead is NULL, only ensure that readahead of PVs are preloaded
* into PV struct device in dev cache.
*/
void lv_calculate_readahead(const struct logical_volume *lv, uint32_t *read_ahead)
{
uint32_t _read_ahead = 0;
if (lv->read_ahead == DM_READ_AHEAD_AUTO)
_lv_postorder((struct logical_volume *)lv, _lv_read_ahead_single, &_read_ahead);
if (read_ahead) {
log_debug("Calculated readahead of LV %s is %u", lv->name, _read_ahead);
*read_ahead = _read_ahead;
}
}
int vg_validate(struct volume_group *vg)
{
struct pv_list *pvl, *pvl2;
struct lv_list *lvl, *lvl2;
char uuid[64] __attribute__((aligned(8)));
int r = 1;
uint32_t hidden_lv_count = 0, lv_count = 0, lv_visible_count = 0;
uint32_t pv_count = 0;
uint32_t num_snapshots = 0;
uint32_t loop_counter1, loop_counter2;
/* FIXME Also check there's no data/metadata overlap */
dm_list_iterate_items(pvl, &vg->pvs) {
if (++pv_count > vg->pv_count) {
log_error(INTERNAL_ERROR "PV list corruption detected in VG %s.", vg->name);
/* FIXME Dump list structure? */
r = 0;
}
if (pvl->pv->vg != vg) {
log_error(INTERNAL_ERROR "VG %s PV list entry points "
"to different VG %s", vg->name,
pvl->pv->vg ? pvl->pv->vg->name : "NULL");
r = 0;
}
}
loop_counter1 = loop_counter2 = 0;
/* FIXME Use temp hash table instead? */
dm_list_iterate_items(pvl, &vg->pvs) {
if (++loop_counter1 > pv_count)
break;
dm_list_iterate_items(pvl2, &vg->pvs) {
if (++loop_counter2 > pv_count)
break;
if (pvl == pvl2)
break;
if (id_equal(&pvl->pv->id,
&pvl2->pv->id)) {
if (!id_write_format(&pvl->pv->id, uuid,
sizeof(uuid)))
stack;
log_error(INTERNAL_ERROR "Duplicate PV id "
"%s detected for %s in %s.",
uuid, pv_dev_name(pvl->pv),
vg->name);
r = 0;
}
}
if (strcmp(pvl->pv->vg_name, vg->name)) {
log_error(INTERNAL_ERROR "VG name for PV %s is corrupted.",
pv_dev_name(pvl->pv));
r = 0;
}
}
if (!check_pv_segments(vg)) {
log_error(INTERNAL_ERROR "PV segments corrupted in %s.",
vg->name);
r = 0;
}
/*
* Count all non-snapshot invisible LVs
*/
dm_list_iterate_items(lvl, &vg->lvs) {
lv_count++;
if (lv_is_cow(lvl->lv))
num_snapshots++;
if (lv_is_visible(lvl->lv))
lv_visible_count++;
if (!check_lv_segments(lvl->lv, 0)) {
log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
lvl->lv->name);
r = 0;
}
if (lvl->lv->status & VISIBLE_LV)
continue;
/* snapshots */
if (lv_is_cow(lvl->lv))
continue;
/* virtual origins are always hidden */
if (lv_is_origin(lvl->lv) && !lv_is_virtual_origin(lvl->lv))
continue;
/* count other non-snapshot invisible volumes */
hidden_lv_count++;
/*
* FIXME: add check for unreferenced invisible LVs
* - snapshot cow & origin
* - mirror log & images
* - mirror conversion volumes (_mimagetmp*)
*/
}
/*
* all volumes = visible LVs + snapshot_cows + invisible LVs
*/
if (lv_count != lv_visible_count + num_snapshots + hidden_lv_count) {
log_error(INTERNAL_ERROR "#internal LVs (%u) != #LVs (%"
PRIu32 ") + #snapshots (%" PRIu32 ") + #internal LVs (%u) in VG %s",
lv_count, lv_visible_count,
num_snapshots, hidden_lv_count, vg->name);
2008-06-06 23:28:35 +04:00
r = 0;
}
/* Avoid endless loop if lv->segments list is corrupt */
if (!r)
return r;
loop_counter1 = loop_counter2 = 0;
/* FIXME Use temp hash table instead? */
dm_list_iterate_items(lvl, &vg->lvs) {
if (++loop_counter1 > lv_count)
break;
dm_list_iterate_items(lvl2, &vg->lvs) {
if (++loop_counter2 > lv_count)
break;
if (lvl == lvl2)
break;
if (!strcmp(lvl->lv->name, lvl2->lv->name)) {
log_error(INTERNAL_ERROR "Duplicate LV name "
"%s detected in %s.", lvl->lv->name,
vg->name);
r = 0;
}
if (id_equal(&lvl->lv->lvid.id[1],
&lvl2->lv->lvid.id[1])) {
if (!id_write_format(&lvl->lv->lvid.id[1], uuid,
sizeof(uuid)))
stack;
log_error(INTERNAL_ERROR "Duplicate LV id "
"%s detected for %s and %s in %s.",
uuid, lvl->lv->name, lvl2->lv->name,
vg->name);
r = 0;
}
}
2005-10-28 01:51:28 +04:00
if (!check_lv_segments(lvl->lv, 1)) {
log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
2005-06-01 20:51:55 +04:00
lvl->lv->name);
r = 0;
2005-06-01 20:51:55 +04:00
}
}
if (!(vg->fid->fmt->features & FMT_UNLIMITED_VOLS) &&
(!vg->max_lv || !vg->max_pv)) {
log_error(INTERNAL_ERROR "Volume group %s has limited PV/LV count"
" but limit is not set.", vg->name);
r = 0;
}
if (vg_max_lv_reached(vg))
stack;
return r;
}
/*
* After vg_write() returns success,
* caller MUST call either vg_commit() or vg_revert()
*/
int vg_write(struct volume_group *vg)
{
struct dm_list *mdah;
struct metadata_area *mda;
2008-01-30 16:19:47 +03:00
if (!vg_validate(vg))
return_0;
if (vg->status & PARTIAL_VG) {
log_error("Cannot update partial volume group %s.", vg->name);
return 0;
}
if (vg_missing_pv_count(vg) && !vg->cmd->handles_missing_pvs) {
log_error("Cannot update volume group %s while physical "
"volumes are missing.", vg->name);
return 0;
}
if (vg_has_unknown_segments(vg) && !vg->cmd->handles_unknown_segments) {
log_error("Cannot update volume group %s with unknown segments in it!",
vg->name);
return 0;
}
2010-06-30 23:28:35 +04:00
if ((vg->fid->fmt->features & FMT_MDAS) && !_vg_adjust_ignored_mdas(vg))
2010-06-30 17:51:11 +04:00
return_0;
2010-06-30 23:28:35 +04:00
if (!vg_mda_used_count(vg)) {
2002-11-18 17:04:08 +03:00
log_error("Aborting vg_write: No metadata areas to write to!");
return 0;
}
if (!drop_cached_metadata(vg)) {
log_error("Unable to drop cached metadata for VG %s.", vg->name);
return 0;
}
vg->seqno++;
/* Write to each copy of the metadata area */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
2004-03-27 00:07:30 +03:00
if (!mda->ops->vg_write) {
log_error("Format does not support writing volume"
"group metadata areas");
/* Revert */
dm_list_uniterate(mdah, &vg->fid->metadata_areas_in_use, &mda->list) {
mda = dm_list_item(mdah, struct metadata_area);
if (mda->ops->vg_revert &&
!mda->ops->vg_revert(vg->fid, vg, mda)) {
stack;
}
}
return 0;
}
2002-11-18 17:04:08 +03:00
if (!mda->ops->vg_write(vg->fid, vg, mda)) {
stack;
/* Revert */
dm_list_uniterate(mdah, &vg->fid->metadata_areas_in_use, &mda->list) {
mda = dm_list_item(mdah, struct metadata_area);
2005-06-01 20:51:55 +04:00
if (mda->ops->vg_revert &&
!mda->ops->vg_revert(vg->fid, vg, mda)) {
stack;
}
}
return 0;
}
}
/* Now pre-commit each copy of the new metadata */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
if (mda->ops->vg_precommit &&
!mda->ops->vg_precommit(vg->fid, vg, mda)) {
stack;
/* Revert */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
if (mda->ops->vg_revert &&
!mda->ops->vg_revert(vg->fid, vg, mda)) {
stack;
}
}
return 0;
}
}
return 1;
}
static int _vg_commit_mdas(struct volume_group *vg)
{
Before committing each mda, arrange mdas so ignored mdas get committed first. Arrange mdas so mdas that are to be ignored come first. This is an optimization that ensures consistency on disk for the longest period of time. This was noted by agk in review of the v4 patchset of pvchange-based mda balance. Note the following example for an explanation of the background: Assume the initial state on disk is as follows: PV0 (v1, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) If we did not sort the list, we would have a commit sequence something like this: PV0 (v2, non-ignored) PV1 (v2, ignored) PV2 (v2, ignored) PV3 (v2, non-ignored) After the commit of PV0's mdas, we'd have an on-disk state like this: PV0 (v2, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is an inconsistent state of the disk. If the machine fails, the next time it was brought back up, the auto-correct mechanism in vg_read would update the metadata on PV1-PV3. However, if possible we try to avoid inconsistent on-disk states. Clearly, because we did not sort, we have a greater chance of on-disk inconsistency - from the time the commit of PV0 is complete until the time PV3 is complete. We could improve the amount of time the on-disk state is consistent by simply sorting the commit order as follows: PV1 (v2, ignored) PV2 (v2, ignored) PV0 (v2, non-ignored) PV3 (v2, non-ignored) Thus, after the first PV is committed (in this case PV1), on-disk we would have: PV0 (v1, non-ignored) PV1 (v2, ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is clearly a consistent state. PV1 will be read but the mda will be ignored. All other PVs contain v1 metadata, and no auto-correct will be required. In fact, if we commit all PVs with ignored mdas first, we'll only have an inconsistent state when we start writing non-ignored PVs, and thus the chances we'll get an inconsistent state on disk is much less with the sorted method. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-29 00:35:49 +04:00
struct metadata_area *mda, *tmda;
struct dm_list ignored;
int failed = 0;
int cache_updated = 0;
Before committing each mda, arrange mdas so ignored mdas get committed first. Arrange mdas so mdas that are to be ignored come first. This is an optimization that ensures consistency on disk for the longest period of time. This was noted by agk in review of the v4 patchset of pvchange-based mda balance. Note the following example for an explanation of the background: Assume the initial state on disk is as follows: PV0 (v1, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) If we did not sort the list, we would have a commit sequence something like this: PV0 (v2, non-ignored) PV1 (v2, ignored) PV2 (v2, ignored) PV3 (v2, non-ignored) After the commit of PV0's mdas, we'd have an on-disk state like this: PV0 (v2, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is an inconsistent state of the disk. If the machine fails, the next time it was brought back up, the auto-correct mechanism in vg_read would update the metadata on PV1-PV3. However, if possible we try to avoid inconsistent on-disk states. Clearly, because we did not sort, we have a greater chance of on-disk inconsistency - from the time the commit of PV0 is complete until the time PV3 is complete. We could improve the amount of time the on-disk state is consistent by simply sorting the commit order as follows: PV1 (v2, ignored) PV2 (v2, ignored) PV0 (v2, non-ignored) PV3 (v2, non-ignored) Thus, after the first PV is committed (in this case PV1), on-disk we would have: PV0 (v1, non-ignored) PV1 (v2, ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is clearly a consistent state. PV1 will be read but the mda will be ignored. All other PVs contain v1 metadata, and no auto-correct will be required. In fact, if we commit all PVs with ignored mdas first, we'll only have an inconsistent state when we start writing non-ignored PVs, and thus the chances we'll get an inconsistent state on disk is much less with the sorted method. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-29 00:35:49 +04:00
/* Rearrange the metadata_areas_in_use so ignored mdas come first. */
dm_list_init(&ignored);
dm_list_iterate_items_safe(mda, tmda, &vg->fid->metadata_areas_in_use)
Before committing each mda, arrange mdas so ignored mdas get committed first. Arrange mdas so mdas that are to be ignored come first. This is an optimization that ensures consistency on disk for the longest period of time. This was noted by agk in review of the v4 patchset of pvchange-based mda balance. Note the following example for an explanation of the background: Assume the initial state on disk is as follows: PV0 (v1, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) If we did not sort the list, we would have a commit sequence something like this: PV0 (v2, non-ignored) PV1 (v2, ignored) PV2 (v2, ignored) PV3 (v2, non-ignored) After the commit of PV0's mdas, we'd have an on-disk state like this: PV0 (v2, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is an inconsistent state of the disk. If the machine fails, the next time it was brought back up, the auto-correct mechanism in vg_read would update the metadata on PV1-PV3. However, if possible we try to avoid inconsistent on-disk states. Clearly, because we did not sort, we have a greater chance of on-disk inconsistency - from the time the commit of PV0 is complete until the time PV3 is complete. We could improve the amount of time the on-disk state is consistent by simply sorting the commit order as follows: PV1 (v2, ignored) PV2 (v2, ignored) PV0 (v2, non-ignored) PV3 (v2, non-ignored) Thus, after the first PV is committed (in this case PV1), on-disk we would have: PV0 (v1, non-ignored) PV1 (v2, ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is clearly a consistent state. PV1 will be read but the mda will be ignored. All other PVs contain v1 metadata, and no auto-correct will be required. In fact, if we commit all PVs with ignored mdas first, we'll only have an inconsistent state when we start writing non-ignored PVs, and thus the chances we'll get an inconsistent state on disk is much less with the sorted method. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-29 00:35:49 +04:00
if (mda_is_ignored(mda))
dm_list_move(&ignored, &mda->list);
dm_list_iterate_items_safe(mda, tmda, &ignored)
Before committing each mda, arrange mdas so ignored mdas get committed first. Arrange mdas so mdas that are to be ignored come first. This is an optimization that ensures consistency on disk for the longest period of time. This was noted by agk in review of the v4 patchset of pvchange-based mda balance. Note the following example for an explanation of the background: Assume the initial state on disk is as follows: PV0 (v1, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) If we did not sort the list, we would have a commit sequence something like this: PV0 (v2, non-ignored) PV1 (v2, ignored) PV2 (v2, ignored) PV3 (v2, non-ignored) After the commit of PV0's mdas, we'd have an on-disk state like this: PV0 (v2, non-ignored) PV1 (v1, non-ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is an inconsistent state of the disk. If the machine fails, the next time it was brought back up, the auto-correct mechanism in vg_read would update the metadata on PV1-PV3. However, if possible we try to avoid inconsistent on-disk states. Clearly, because we did not sort, we have a greater chance of on-disk inconsistency - from the time the commit of PV0 is complete until the time PV3 is complete. We could improve the amount of time the on-disk state is consistent by simply sorting the commit order as follows: PV1 (v2, ignored) PV2 (v2, ignored) PV0 (v2, non-ignored) PV3 (v2, non-ignored) Thus, after the first PV is committed (in this case PV1), on-disk we would have: PV0 (v1, non-ignored) PV1 (v2, ignored) PV2 (v1, non-ignored) PV3 (v1, non-ignored) This is clearly a consistent state. PV1 will be read but the mda will be ignored. All other PVs contain v1 metadata, and no auto-correct will be required. In fact, if we commit all PVs with ignored mdas first, we'll only have an inconsistent state when we start writing non-ignored PVs, and thus the chances we'll get an inconsistent state on disk is much less with the sorted method. Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
2010-06-29 00:35:49 +04:00
dm_list_move(&vg->fid->metadata_areas_in_use, &mda->list);
/* Commit to each copy of the metadata area */
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
failed = 0;
2002-11-18 17:04:08 +03:00
if (mda->ops->vg_commit &&
!mda->ops->vg_commit(vg->fid, vg, mda)) {
stack;
failed = 1;
}
/* Update cache first time we succeed */
if (!failed && !cache_updated) {
2008-03-17 19:51:31 +03:00
lvmcache_update_vg(vg, 0);
cache_updated = 1;
}
}
return cache_updated;
}
/* Commit pending changes */
int vg_commit(struct volume_group *vg)
{
int cache_updated = 0;
if (!vgname_is_locked(vg->name)) {
log_error(INTERNAL_ERROR "Attempt to write new VG metadata "
"without locking %s", vg->name);
return cache_updated;
}
cache_updated = _vg_commit_mdas(vg);
if (cache_updated) {
/* Instruct remote nodes to upgrade cached metadata. */
remote_commit_cached_metadata(vg);
/*
* We need to clear old_name after a successful commit.
* The volume_group structure could be reused later.
*/
vg->old_name = NULL;
}
/* If update failed, remove any cached precommitted metadata. */
if (!cache_updated && !drop_cached_metadata(vg))
log_error("Attempt to drop cached metadata failed "
"after commit for VG %s.", vg->name);
/* If at least one mda commit succeeded, it was committed */
return cache_updated;
}
/* Don't commit any pending changes */
int vg_revert(struct volume_group *vg)
{
struct metadata_area *mda;
dm_list_iterate_items(mda, &vg->fid->metadata_areas_in_use) {
if (mda->ops->vg_revert &&
!mda->ops->vg_revert(vg->fid, vg, mda)) {
stack;
}
}
if (!drop_cached_metadata(vg))
log_error("Attempt to drop cached metadata failed "
"after reverted update for VG %s.", vg->name);
remote_revert_cached_metadata(vg);
return 1;
}
2002-11-18 17:04:08 +03:00
/* Make orphan PVs look like a VG */
2008-02-06 18:47:28 +03:00
static struct volume_group *_vg_read_orphans(struct cmd_context *cmd,
const char *orphan_vgname)
2002-11-18 17:04:08 +03:00
{
struct lvmcache_vginfo *vginfo;
2005-06-01 20:51:55 +04:00
struct lvmcache_info *info;
2002-11-18 17:04:08 +03:00
struct pv_list *pvl;
struct volume_group *vg;
struct physical_volume *pv;
struct dm_pool *mem;
2002-11-18 17:04:08 +03:00
lvmcache_label_scan(cmd, 0);
2008-02-06 18:47:28 +03:00
if (!(vginfo = vginfo_from_vgname(orphan_vgname, NULL)))
2008-01-30 16:19:47 +03:00
return_NULL;
2002-11-18 17:04:08 +03:00
if (!(mem = dm_pool_create("vg_read orphan", VG_MEMPOOL_CHUNK)))
return_NULL;
if (!(vg = dm_pool_zalloc(mem, sizeof(*vg)))) {
2002-11-18 17:04:08 +03:00
log_error("vg allocation failed");
goto bad;
2002-11-18 17:04:08 +03:00
}
dm_list_init(&vg->pvs);
dm_list_init(&vg->lvs);
dm_list_init(&vg->tags);
dm_list_init(&vg->removed_pvs);
vg->vgmem = mem;
2002-11-18 17:04:08 +03:00
vg->cmd = cmd;
if (!(vg->name = dm_pool_strdup(mem, orphan_vgname))) {
2002-11-18 17:04:08 +03:00
log_error("vg name allocation failed");
goto bad;
2002-11-18 17:04:08 +03:00
}
2008-04-08 02:12:37 +04:00
/* create format instance with appropriate metadata area */
if (!(vg->fid = vginfo->fmt->ops->create_instance(vginfo->fmt,
orphan_vgname, NULL,
NULL))) {
log_error("Failed to create format instance");
goto bad;
2008-04-08 02:12:37 +04:00
}
dm_list_iterate_items(info, &vginfo->infos) {
if (!(pv = _pv_read(cmd, mem, dev_name(info->dev), NULL, NULL, 1, 0))) {
2002-11-18 17:04:08 +03:00
continue;
}
if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
2002-11-18 17:04:08 +03:00
log_error("pv_list allocation failed");
goto bad;
2002-11-18 17:04:08 +03:00
}
pvl->pv = pv;
add_pvl_to_vgs(vg, pvl);
2002-11-18 17:04:08 +03:00
}
return vg;
bad:
dm_pool_destroy(mem);
return NULL;
2002-11-18 17:04:08 +03:00
}
static int _update_pv_list(struct dm_pool *pvmem, struct dm_list *all_pvs, struct volume_group *vg)
{
struct pv_list *pvl, *pvl2;
dm_list_iterate_items(pvl, &vg->pvs) {
dm_list_iterate_items(pvl2, all_pvs) {
if (pvl->pv->dev == pvl2->pv->dev)
goto next_pv;
}
/*
* PV is not on list so add it.
*/
if (!(pvl2 = _copy_pvl(pvmem, pvl))) {
log_error("pv_list allocation for '%s' failed",
pv_dev_name(pvl->pv));
return 0;
}
dm_list_add(all_pvs, &pvl2->list);
next_pv:
;
}
return 1;
}
int vg_missing_pv_count(const struct volume_group *vg)
{
int ret = 0;
struct pv_list *pvl;
dm_list_iterate_items(pvl, &vg->pvs) {
2010-03-16 17:37:38 +03:00
if (is_missing_pv(pvl->pv))
++ ret;
}
return ret;
}
static void check_reappeared_pv(struct volume_group *correct_vg,
struct physical_volume *pv)
{
struct pv_list *pvl;
dm_list_iterate_items(pvl, &correct_vg->pvs)
2010-03-16 17:37:38 +03:00
if (pv->dev == pvl->pv->dev && is_missing_pv(pvl->pv)) {
log_warn("Missing device %s reappeared, updating "
"metadata for VG %s to version %u.",
pv_dev_name(pvl->pv), pv_vg_name(pvl->pv),
correct_vg->seqno);
if (pvl->pv->pe_alloc_count == 0) {
pv->status &= ~MISSING_PV;
pvl->pv->status &= ~MISSING_PV;
} else
2010-06-23 01:10:53 +04:00
log_warn("Device still marked missing because of allocated data "
"on it, remove volumes and consider vgreduce --removemissing.");
}
}
/* Caller sets consistent to 1 if it's safe for vg_read_internal to correct
2002-11-18 17:04:08 +03:00
* inconsistent metadata on disk (i.e. the VG write lock is held).
* This guarantees only consistent metadata is returned.
2002-11-18 17:04:08 +03:00
* If consistent is 0, caller must check whether consistent == 1 on return
2008-01-30 17:00:02 +03:00
* and take appropriate action if it isn't (e.g. abort; get write lock
* and call vg_read_internal again).
*
* If precommitted is set, use precommitted metadata if present.
2008-06-06 15:12:50 +04:00
*
* Either of vgname or vgid may be NULL.
2002-11-18 17:04:08 +03:00
*/
static struct volume_group *_vg_read(struct cmd_context *cmd,
const char *vgname,
const char *vgid,
2008-03-17 19:51:31 +03:00
int *consistent, unsigned precommitted)
{
struct format_instance *fid;
const struct format_type *fmt;
struct volume_group *vg, *correct_vg = NULL;
2002-11-18 17:04:08 +03:00
struct metadata_area *mda;
struct lvmcache_info *info;
int inconsistent = 0;
int inconsistent_vgid = 0;
int inconsistent_pvs = 0;
int inconsistent_seqno = 0;
int inconsistent_mdas = 0;
2008-03-17 19:51:31 +03:00
unsigned use_precommitted = precommitted;
unsigned saved_handles_missing_pvs = cmd->handles_missing_pvs;
struct dm_list *pvids;
struct pv_list *pvl, *pvl2;
struct dm_list all_pvs;
char uuid[64] __attribute__((aligned(8)));
if (is_orphan_vg(vgname)) {
if (use_precommitted) {
log_error(INTERNAL_ERROR "vg_read_internal requires vgname "
"with pre-commit.");
return NULL;
}
2002-11-18 17:04:08 +03:00
*consistent = 1;
2008-02-06 18:47:28 +03:00
return _vg_read_orphans(cmd, vgname);
2002-11-18 17:04:08 +03:00
}
/*
* If cached metadata was inconsistent and *consistent is set
* then repair it now. Otherwise just return it.
* Also return if use_precommitted is set due to the FIXME in
* the missing PV logic below.
*/
if ((correct_vg = lvmcache_get_vg(vgid, precommitted)) &&
(use_precommitted || !*consistent || !(correct_vg->status & INCONSISTENT_VG))) {
if (!(correct_vg->status & INCONSISTENT_VG))
*consistent = 1;
else /* Inconsistent but we can't repair it */
correct_vg->status &= ~INCONSISTENT_VG;
if (vg_missing_pv_count(correct_vg)) {
log_verbose("There are %d physical volumes missing.",
vg_missing_pv_count(correct_vg));
vg_mark_partial_lvs(correct_vg);
}
return correct_vg;
} else {
vg_release(correct_vg);
correct_vg = NULL;
}
2002-11-18 17:04:08 +03:00
/* Find the vgname in the cache */
/* If it's not there we must do full scan to be completely sure */
if (!(fmt = fmt_from_vgname(vgname, vgid))) {
lvmcache_label_scan(cmd, 0);
if (!(fmt = fmt_from_vgname(vgname, vgid))) {
2008-01-30 16:19:47 +03:00
if (memlock())
return_NULL;
lvmcache_label_scan(cmd, 2);
2008-01-30 16:19:47 +03:00
if (!(fmt = fmt_from_vgname(vgname, vgid)))
return_NULL;
}
}
2008-06-06 15:12:50 +04:00
/* Now determine the correct vgname if none was supplied */
if (!vgname && !(vgname = vgname_from_vgid(cmd->mem, vgid)))
return_NULL;
if (use_precommitted && !(fmt->features & FMT_PRECOMMIT))
use_precommitted = 0;
2002-11-18 17:04:08 +03:00
/* create format instance with appropriate metadata area */
if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
log_error("Failed to create format instance");
return NULL;
}
/* Store pvids for later so we can check if any are missing */
if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
return_NULL;
/* Ensure contents of all metadata areas match - else do recovery */
dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
if ((use_precommitted &&
!(vg = mda->ops->vg_read_precommit(fid, vgname, mda))) ||
(!use_precommitted &&
!(vg = mda->ops->vg_read(fid, vgname, mda)))) {
2002-11-18 17:04:08 +03:00
inconsistent = 1;
vg_release(vg);
2002-11-18 17:04:08 +03:00
continue;
}
if (!correct_vg) {
correct_vg = vg;
continue;
}
2002-11-18 17:04:08 +03:00
/* FIXME Also ensure contents same - checksum compare? */
if (correct_vg->seqno != vg->seqno) {
if (cmd->metadata_read_only)
log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) "
"as global/metadata_read_only is set.",
vgname, vg->seqno, correct_vg->seqno);
else {
inconsistent = 1;
inconsistent_seqno = 1;
}
if (vg->seqno > correct_vg->seqno) {
vg_release(correct_vg);
correct_vg = vg;
}
}
if (vg != correct_vg)
vg_release(vg);
}
/* Ensure every PV in the VG was in the cache */
if (correct_vg) {
/*
* If the VG has PVs without mdas, or ignored mdas, they may
* still be orphans in the cache: update the cache state here,
* and update the metadata lists in the vg.
*/
if (!inconsistent &&
dm_list_size(&correct_vg->pvs) > dm_list_size(pvids)) {
dm_list_iterate_items(pvl, &correct_vg->pvs) {
if (!pvl->pv->dev) {
inconsistent_pvs = 1;
break;
}
if (str_list_match_item(pvids, pvl->pv->dev->pvid))
continue;
/*
* PV not marked as belonging to this VG in cache.
* Check it's an orphan without metadata area
* not ignored.
*/
if (!(info = info_from_pvid(pvl->pv->dev->pvid, 1)) ||
!info->vginfo || !is_orphan_vg(info->vginfo->vgname)) {
inconsistent_pvs = 1;
break;
}
if (dm_list_size(&info->mdas)) {
if (!fid_add_mdas(fid, &info->mdas))
return_NULL;
log_debug("Empty mda found for VG %s.", vgname);
if (inconsistent_mdas)
continue;
/*
* If any newly-added mdas are in-use then their
* metadata needs updating.
*/
dm_list_iterate_items(mda, &info->mdas)
if (!mda_is_ignored(mda)) {
inconsistent_mdas = 1;
break;
}
}
}
/* If the check passed, let's update VG and recalculate pvids */
if (!inconsistent_pvs) {
log_debug("Updating cache for PVs without mdas "
"in VG %s.", vgname);
/*
* If there is no precommitted metadata, committed metadata
* is read and stored in the cache even if use_precommitted is set
*/
lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED);
if (!(pvids = lvmcache_get_pvids(cmd, vgname, vgid)))
return_NULL;
}
}
if (dm_list_size(&correct_vg->pvs) !=
dm_list_size(pvids) + vg_missing_pv_count(correct_vg)) {
log_debug("Cached VG %s had incorrect PV list",
vgname);
if (memlock())
inconsistent = 1;
else {
vg_release(correct_vg);
correct_vg = NULL;
}
} else dm_list_iterate_items(pvl, &correct_vg->pvs) {
2010-03-16 17:37:38 +03:00
if (is_missing_pv(pvl->pv))
continue;
if (!str_list_match_item(pvids, pvl->pv->dev->pvid)) {
log_debug("Cached VG %s had incorrect PV list",
vgname);
vg_release(correct_vg);
correct_vg = NULL;
break;
}
}
if (correct_vg && inconsistent_mdas) {
vg_release(correct_vg);
correct_vg = NULL;
}
}
dm_list_init(&all_pvs);
/* Failed to find VG where we expected it - full scan and retry */
if (!correct_vg) {
inconsistent = 0;
2008-01-30 16:19:47 +03:00
if (memlock())
return_NULL;
lvmcache_label_scan(cmd, 2);
2008-01-30 16:19:47 +03:00
if (!(fmt = fmt_from_vgname(vgname, vgid)))
return_NULL;
if (precommitted && !(fmt->features & FMT_PRECOMMIT))
use_precommitted = 0;
/* create format instance with appropriate metadata area */
if (!(fid = fmt->ops->create_instance(fmt, vgname, vgid, NULL))) {
log_error("Failed to create format instance");
return NULL;
}
/* Ensure contents of all metadata areas match - else recover */
dm_list_iterate_items(mda, &fid->metadata_areas_in_use) {
if ((use_precommitted &&
!(vg = mda->ops->vg_read_precommit(fid, vgname,
mda))) ||
(!use_precommitted &&
!(vg = mda->ops->vg_read(fid, vgname, mda)))) {
inconsistent = 1;
continue;
}
if (!correct_vg) {
correct_vg = vg;
if (!_update_pv_list(cmd->mem, &all_pvs, correct_vg)) {
vg_release(vg);
return_NULL;
}
continue;
}
if (strncmp((char *)vg->id.uuid,
(char *)correct_vg->id.uuid, ID_LEN)) {
inconsistent = 1;
inconsistent_vgid = 1;
}
/* FIXME Also ensure contents same - checksums same? */
if (correct_vg->seqno != vg->seqno) {
/* Ignore inconsistent seqno if told to skip repair logic */
if (cmd->metadata_read_only)
log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) "
"as global/metadata_read_only is set.",
vgname, vg->seqno, correct_vg->seqno);
else {
inconsistent = 1;
inconsistent_seqno = 1;
}
if (!_update_pv_list(cmd->mem, &all_pvs, vg)) {
vg_release(vg);
vg_release(correct_vg);
return_NULL;
}
if (vg->seqno > correct_vg->seqno) {
vg_release(correct_vg);
correct_vg = vg;
}
}
if (vg != correct_vg)
vg_release(vg);
}
/* Give up looking */
2008-01-30 16:19:47 +03:00
if (!correct_vg)
return_NULL;
}
/*
* If there is no precommitted metadata, committed metadata
* is read and stored in the cache even if use_precommitted is set
*/
lvmcache_update_vg(correct_vg, correct_vg->status & PRECOMMITTED &
(inconsistent ? INCONSISTENT_VG : 0));
2002-11-18 17:04:08 +03:00
if (inconsistent) {
/* FIXME Test should be if we're *using* precommitted metadata not if we were searching for it */
if (use_precommitted) {
log_error("Inconsistent pre-commit metadata copies "
"for volume group %s", vgname);
/* FIXME: during repair, there is inconsistent flag set because some metadata areas
* are missing (on missing PVs). Code should create list of missing PVs, compare it
* with PV marked missing in metadata and if equals, use it as consistent vg.
* For now, return precommited metadata if remainng seq match here to allow
* preloading table in suspend call.
*/
if (!inconsistent_seqno) {
*consistent = 0;
return correct_vg;
}
vg_release(correct_vg);
return NULL;
}
2002-11-18 17:04:08 +03:00
if (!*consistent)
return correct_vg;
/* Don't touch if vgids didn't match */
if (inconsistent_vgid) {
log_error("Inconsistent metadata UUIDs found for "
"volume group %s", vgname);
*consistent = 0;
return correct_vg;
}
log_warn("WARNING: Inconsistent metadata found for VG %s - updating "
"to use version %u", vgname, correct_vg->seqno);
/*
* If PV is marked missing but we found it,
* update metadata and remove MISSING flag
*/
dm_list_iterate_items(pvl, &all_pvs)
check_reappeared_pv(correct_vg, pvl->pv);
cmd->handles_missing_pvs = 1;
if (!vg_write(correct_vg)) {
log_error("Automatic metadata correction failed");
vg_release(correct_vg);
cmd->handles_missing_pvs = saved_handles_missing_pvs;
return NULL;
}
cmd->handles_missing_pvs = saved_handles_missing_pvs;
2005-01-17 21:24:28 +03:00
if (!vg_commit(correct_vg)) {
log_error("Automatic metadata correction commit "
"failed");
vg_release(correct_vg);
2005-01-17 21:24:28 +03:00
return NULL;
}
dm_list_iterate_items(pvl, &all_pvs) {
dm_list_iterate_items(pvl2, &correct_vg->pvs) {
if (pvl->pv->dev == pvl2->pv->dev)
goto next_pv;
}
if (!id_write_format(&pvl->pv->id, uuid, sizeof(uuid))) {
vg_release(correct_vg);
return_NULL;
}
log_error("Removing PV %s (%s) that no longer belongs to VG %s",
pv_dev_name(pvl->pv), uuid, correct_vg->name);
if (!pv_write_orphan(cmd, pvl->pv)) {
vg_release(correct_vg);
return_NULL;
}
/* Refresh metadata after orphan write */
drop_cached_metadata(correct_vg);
next_pv:
;
}
}
if (vg_missing_pv_count(correct_vg)) {
log_verbose("There are %d physical volumes missing.",
vg_missing_pv_count(correct_vg));
vg_mark_partial_lvs(correct_vg);
}
2003-05-06 16:06:02 +04:00
if ((correct_vg->status & PVMOVE) && !pvmove_mode()) {
2003-04-30 19:23:43 +04:00
log_error("WARNING: Interrupted pvmove detected in "
"volume group %s", correct_vg->name);
2003-04-30 19:23:43 +04:00
log_error("Please restore the metadata by running "
"vgcfgrestore.");
vg_release(correct_vg);
2003-04-30 19:23:43 +04:00
return NULL;
}
2003-04-30 19:23:43 +04:00
*consistent = 1;
return correct_vg;
}
struct volume_group *vg_read_internal(struct cmd_context *cmd, const char *vgname,
const char *vgid, int *consistent)
{
struct volume_group *vg;
struct lv_list *lvl;
if (!(vg = _vg_read(cmd, vgname, vgid, consistent, 0)))
return NULL;
if (!check_pv_segments(vg)) {
log_error(INTERNAL_ERROR "PV segments corrupted in %s.",
vg->name);
vg_release(vg);
return NULL;
}
dm_list_iterate_items(lvl, &vg->lvs) {
if (!check_lv_segments(lvl->lv, 0)) {
log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
lvl->lv->name);
vg_release(vg);
return NULL;
}
}
dm_list_iterate_items(lvl, &vg->lvs) {
/*
* Checks that cross-reference other LVs.
*/
2005-10-28 01:51:28 +04:00
if (!check_lv_segments(lvl->lv, 1)) {
log_error(INTERNAL_ERROR "LV segments corrupted in %s.",
lvl->lv->name);
vg_release(vg);
return NULL;
}
}
return vg;
}
void vg_release(struct volume_group *vg)
{
if (!vg || !vg->vgmem)
return;
if (vg->cmd && vg->vgmem == vg->cmd->mem)
log_error(INTERNAL_ERROR "global memory pool used for VG %s",
vg->name);
dm_pool_destroy(vg->vgmem);
}
2008-01-30 17:00:02 +03:00
/* This is only called by lv_from_lvid, which is only called from
* activate.c so we know the appropriate VG lock is already held and
* the vg_read_internal is therefore safe.
2002-11-18 17:04:08 +03:00
*/
static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd,
const char *vgid,
2008-03-17 19:51:31 +03:00
unsigned precommitted)
{
2003-10-16 00:10:11 +04:00
const char *vgname;
struct dm_list *vgnames;
struct volume_group *vg = NULL;
struct lvmcache_vginfo *vginfo;
2005-06-01 20:51:55 +04:00
struct str_list *strl;
2002-11-18 17:04:08 +03:00
int consistent = 0;
/* Is corresponding vgname already cached? */
if ((vginfo = vginfo_from_vgid(vgid)) &&
vginfo->vgname && !is_orphan_vg(vginfo->vgname)) {
2008-06-06 15:12:50 +04:00
if ((vg = _vg_read(cmd, NULL, vgid,
&consistent, precommitted)) &&
!strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2002-11-18 17:04:08 +03:00
if (!consistent) {
log_error("Volume group %s metadata is "
"inconsistent", vg->name);
2002-11-18 17:04:08 +03:00
}
return vg;
}
vg_release(vg);
2002-11-18 17:04:08 +03:00
}
2003-10-16 00:10:11 +04:00
/* Mustn't scan if memory locked: ensure cache gets pre-populated! */
if (memlock())
goto out;
/* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */
/* FIXME Disabled vgrenames while active for now because we aren't
* allowed to do a full scan here any more. */
2008-01-30 17:00:02 +03:00
// The slow way - full scan required to cope with vgrename
lvmcache_label_scan(cmd, 2);
if (!(vgnames = get_vgnames(cmd, 0))) {
log_error("vg_read_by_vgid: get_vgnames failed");
goto out;
}
dm_list_iterate_items(strl, vgnames) {
2005-06-01 20:51:55 +04:00
vgname = strl->str;
if (!vgname)
2008-01-30 17:00:02 +03:00
continue; // FIXME Unnecessary?
2002-11-18 17:04:08 +03:00
consistent = 0;
if ((vg = _vg_read(cmd, vgname, vgid, &consistent,
precommitted)) &&
!strncmp((char *)vg->id.uuid, vgid, ID_LEN)) {
2002-11-18 17:04:08 +03:00
if (!consistent) {
log_error("Volume group %s metadata is "
"inconsistent", vgname);
goto out;
2002-11-18 17:04:08 +03:00
}
return vg;
}
}
out:
vg_release(vg);
return NULL;
}
2002-11-18 17:04:08 +03:00
/* Only called by activate.c */
struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s,
2008-03-17 19:51:31 +03:00
unsigned precommitted)
2002-11-18 17:04:08 +03:00
{
struct lv_list *lvl;
struct volume_group *vg;
const union lvid *lvid;
2002-11-18 17:04:08 +03:00
lvid = (const union lvid *) lvid_s;
2002-11-18 17:04:08 +03:00
log_very_verbose("Finding volume group for uuid %s", lvid_s);
if (!(vg = _vg_read_by_vgid(cmd, (char *)lvid->id[0].uuid, precommitted))) {
2002-11-18 17:04:08 +03:00
log_error("Volume group for uuid not found: %s", lvid_s);
return NULL;
}
log_verbose("Found volume group \"%s\"", vg->name);
if (vg->status & EXPORTED_VG) {
log_error("Volume group \"%s\" is exported", vg->name);
goto out;
2002-11-18 17:04:08 +03:00
}
if (!(lvl = find_lv_in_vg_by_lvid(vg, lvid))) {
log_very_verbose("Can't find logical volume id %s", lvid_s);
goto out;
2002-11-18 17:04:08 +03:00
}
return lvl->lv;
out:
vg_release(vg);
return NULL;
2002-11-18 17:04:08 +03:00
}
const char *find_vgname_from_pvid(struct cmd_context *cmd,
const char *pvid)
{
char *vgname;
struct lvmcache_info *info;
vgname = lvmcache_vgname_from_pvid(cmd, pvid);
if (is_orphan_vg(vgname)) {
if (!(info = info_from_pvid(pvid, 0))) {
return_NULL;
}
/*
* If an orphan PV has no MDAs, or it has MDAs but the
* MDA is ignored, it may appear to be an orphan until
* the metadata is read off another PV in the same VG.
* Detecting this means checking every VG by scanning
* every PV on the system.
*/
if (mdas_empty_or_ignored(&info->mdas)) {
if (!scan_vgs_for_pvs(cmd)) {
log_error("Rescan for PVs without "
"metadata areas failed.");
return NULL;
}
/*
* Ask lvmcache again - we may have a non-orphan
* name now
*/
vgname = lvmcache_vgname_from_pvid(cmd, pvid);
}
}
return vgname;
}
const char *find_vgname_from_pvname(struct cmd_context *cmd,
const char *pvname)
{
const char *pvid;
pvid = pvid_from_devname(cmd, pvname);
if (!pvid)
/* Not a PV */
return NULL;
return find_vgname_from_pvid(cmd, pvid);
}
/**
* pv_read - read and return a handle to a physical volume
* @cmd: LVM command initiating the pv_read
* @pv_name: full device name of the PV, including the path
* @mdas: list of metadata areas of the PV
* @label_sector: sector number where the PV label is stored on @pv_name
* @warnings:
*
* Returns:
* PV handle - valid pv_name and successful read of the PV, or
* NULL - invalid parameter or error in reading the PV
*
* Note:
* FIXME - liblvm todo - make into function that returns handle
*/
2002-11-18 17:04:08 +03:00
struct physical_volume *pv_read(struct cmd_context *cmd, const char *pv_name,
struct dm_list *mdas, uint64_t *label_sector,
int warnings, int scan_label_only)
{
return _pv_read(cmd, cmd->mem, pv_name, mdas, label_sector, warnings, scan_label_only);
}
/* FIXME Use label functions instead of PV functions */
2008-01-30 17:00:02 +03:00
static struct physical_volume *_pv_read(struct cmd_context *cmd,
struct dm_pool *pvmem,
const char *pv_name,
struct dm_list *mdas,
uint64_t *label_sector,
int warnings, int scan_label_only)
{
struct physical_volume *pv;
2002-11-18 17:04:08 +03:00
struct label *label;
struct lvmcache_info *info;
2002-11-18 17:04:08 +03:00
struct device *dev;
2008-01-30 16:19:47 +03:00
if (!(dev = dev_cache_get(pv_name, cmd->filter)))
return_NULL;
if (!(label_read(dev, &label, UINT64_C(0)))) {
if (warnings)
log_error("No physical volume label read from %s",
pv_name);
2005-04-20 00:52:35 +04:00
return NULL;
}
info = (struct lvmcache_info *) label->info;
2002-11-18 17:04:08 +03:00
if (label_sector && *label_sector)
*label_sector = label->sector;
pv = _alloc_pv(pvmem, dev);
if (!pv) {
log_error("pv allocation for '%s' failed", pv_name);
2005-04-20 00:52:35 +04:00
return NULL;
2002-11-18 17:04:08 +03:00
}
/* FIXME Move more common code up here */
if (!(info->fmt->ops->pv_read(info->fmt, pv_name, pv, mdas,
scan_label_only))) {
log_error("Failed to read existing physical volume '%s'",
pv_name);
goto bad;
}
if (!pv->size)
goto bad;
if (!alloc_pv_segment_whole_pv(pvmem, pv))
goto_bad;
2005-04-20 00:52:35 +04:00
return pv;
bad:
_free_pv(pvmem, pv);
return NULL;
}
2002-11-18 17:04:08 +03:00
/* May return empty list */
struct dm_list *get_vgnames(struct cmd_context *cmd, int include_internal)
{
return lvmcache_get_vgnames(cmd, include_internal);
}
struct dm_list *get_vgids(struct cmd_context *cmd, int include_internal)
{
return lvmcache_get_vgids(cmd, include_internal);
}
static int _get_pvs(struct cmd_context *cmd, struct dm_list **pvslist)
{
2005-06-01 20:51:55 +04:00
struct str_list *strl;
struct dm_list * uninitialized_var(results);
const char *vgname, *vgid;
struct pv_list *pvl, *pvl_copy;
struct dm_list *vgids;
2002-11-18 17:04:08 +03:00
struct volume_group *vg;
int consistent = 0;
2003-04-30 19:23:43 +04:00
int old_pvmove;
2002-11-18 17:04:08 +03:00
lvmcache_label_scan(cmd, 0);
if (pvslist) {
if (!(results = dm_pool_alloc(cmd->mem, sizeof(*results)))) {
log_error("PV list allocation failed");
2008-01-17 01:52:46 +03:00
return 0;
}
dm_list_init(results);
}
2002-11-18 17:04:08 +03:00
/* Get list of VGs */
if (!(vgids = get_vgids(cmd, 1))) {
log_error("get_pvs: get_vgids failed");
return 0;
}
2002-11-18 17:04:08 +03:00
/* Read every VG to ensure cache consistency */
/* Orphan VG is last on list */
2003-04-30 19:23:43 +04:00
old_pvmove = pvmove_mode();
init_pvmove(1);
dm_list_iterate_items(strl, vgids) {
vgid = strl->str;
if (!vgid)
2002-11-18 17:04:08 +03:00
continue; /* FIXME Unnecessary? */
consistent = 0;
2006-04-14 01:08:29 +04:00
if (!(vgname = vgname_from_vgid(NULL, vgid))) {
stack;
continue;
}
if (!(vg = vg_read_internal(cmd, vgname, vgid, &consistent))) {
2002-11-18 17:04:08 +03:00
stack;
continue;
}
if (!consistent)
log_warn("WARNING: Volume Group %s is not consistent",
vgname);
2002-11-18 17:04:08 +03:00
/* Move PVs onto results list */
if (pvslist)
dm_list_iterate_items(pvl, &vg->pvs) {
if (!(pvl_copy = _copy_pvl(cmd->mem, pvl))) {
log_error("PV list allocation failed");
vg_release(vg);
return 0;
}
dm_list_add(results, &pvl_copy->list);
}
vg_release(vg);
2002-11-18 17:04:08 +03:00
}
2003-04-30 19:23:43 +04:00
init_pvmove(old_pvmove);
2002-11-18 17:04:08 +03:00
if (pvslist)
*pvslist = results;
else
dm_pool_free(cmd->mem, vgids);
return 1;
}
struct dm_list *get_pvs(struct cmd_context *cmd)
{
struct dm_list *results;
if (!_get_pvs(cmd, &results))
return NULL;
return results;
}
int scan_vgs_for_pvs(struct cmd_context *cmd)
{
return _get_pvs(cmd, NULL);
}
int pv_write(struct cmd_context *cmd __attribute__((unused)),
struct physical_volume *pv,
struct dm_list *mdas, int64_t label_sector)
{
if (!pv->fmt->ops->pv_write) {
log_error("Format does not support writing physical volumes");
return 0;
}
if (!is_orphan_vg(pv->vg_name) || pv->pe_alloc_count) {
2002-11-18 17:04:08 +03:00
log_error("Assertion failed: can't _pv_write non-orphan PV "
"(in VG %s)", pv->vg_name);
return 0;
}
2008-01-30 16:19:47 +03:00
if (!pv->fmt->ops->pv_write(pv->fmt, pv, mdas, label_sector))
return_0;
return 1;
}
int pv_write_orphan(struct cmd_context *cmd, struct physical_volume *pv)
{
const char *old_vg_name = pv->vg_name;
2008-02-06 18:47:28 +03:00
pv->vg_name = cmd->fmt->orphan_vg_name;
pv->status = ALLOCATABLE_PV;
pv->pe_alloc_count = 0;
if (!dev_get_size(pv->dev, &pv->size)) {
log_error("%s: Couldn't get size.", pv_dev_name(pv));
return 0;
}
2009-08-10 21:15:01 +04:00
if (!pv_write(cmd, pv, NULL, INT64_C(-1))) {
log_error("Failed to clear metadata from physical "
"volume \"%s\" after removal from \"%s\"",
pv_dev_name(pv), old_vg_name);
return 0;
}
return 1;
}
int is_global_vg(const char *vg_name)
{
return (vg_name && !strcmp(vg_name, VG_GLOBAL)) ? 1 : 0;
}
/**
* is_orphan_vg - Determine whether a vg_name is an orphan
* @vg_name: pointer to the vg_name
*/
int is_orphan_vg(const char *vg_name)
{
return (vg_name && !strncmp(vg_name, ORPHAN_PREFIX, sizeof(ORPHAN_PREFIX) - 1)) ? 1 : 0;
}
/*
* Returns:
* 0 - fail
* 1 - success
*/
int pv_analyze(struct cmd_context *cmd, const char *pv_name,
uint64_t label_sector)
{
struct label *label;
struct device *dev;
struct metadata_area *mda;
struct lvmcache_info *info;
dev = dev_cache_get(pv_name, cmd->filter);
if (!dev) {
log_error("Device %s not found (or ignored by filtering).",
pv_name);
return 0;
}
/*
* First, scan for LVM labels.
*/
if (!label_read(dev, &label, label_sector)) {
log_error("Could not find LVM label on %s",
pv_name);
return 0;
}
log_print("Found label on %s, sector %"PRIu64", type=%s",
pv_name, label->sector, label->type);
/*
* Next, loop through metadata areas
*/
info = label->info;
dm_list_iterate_items(mda, &info->mdas)
mda->ops->pv_analyze_mda(info->fmt, mda);
return 1;
}
/* FIXME: remove / combine this with locking? */
int vg_check_write_mode(struct volume_group *vg)
{
if (vg->open_mode != 'w') {
2009-07-29 00:41:41 +04:00
log_errno(EPERM, "Attempt to modify a read-only VG");
return 0;
}
return 1;
}
/*
* Performs a set of checks against a VG according to bits set in status
* and returns FAILED_* bits for those that aren't acceptable.
*
* FIXME Remove the unnecessary duplicate definitions and return bits directly.
*/
static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
uint64_t status)
{
uint32_t failure = 0;
if ((status & CLUSTERED) &&
(vg_is_clustered(vg)) && !locking_is_clustered()) {
log_error("Skipping clustered volume group %s", vg->name);
/* Return because other flags are considered undefined. */
return FAILED_CLUSTERED;
}
if ((status & EXPORTED_VG) &&
vg_is_exported(vg)) {
log_error("Volume group %s is exported", vg->name);
failure |= FAILED_EXPORTED;
}
if ((status & LVM_WRITE) &&
!(vg->status & LVM_WRITE)) {
log_error("Volume group %s is read-only", vg->name);
failure |= FAILED_READ_ONLY;
}
if ((status & RESIZEABLE_VG) &&
!vg_is_resizeable(vg)) {
log_error("Volume group %s is not resizeable.", vg->name);
failure |= FAILED_RESIZEABLE;
}
return failure;
}
/**
* vg_check_status - check volume group status flags and log error
* @vg - volume group to check status flags
* @status - specific status flags to check (e.g. EXPORTED_VG)
*/
int vg_check_status(const struct volume_group *vg, uint64_t status)
{
return !_vg_bad_status_bits(vg, status);
}
static struct volume_group *_recover_vg(struct cmd_context *cmd,
const char *vg_name, const char *vgid)
{
int consistent = 1;
struct volume_group *vg;
unlock_vg(cmd, vg_name);
dev_close_all();
if (!lock_vol(cmd, vg_name, LCK_VG_WRITE))
return_NULL;
if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent)))
return_NULL;
if (!consistent) {
vg_release(vg);
return_NULL;
}
return (struct volume_group *)vg;
}
/*
* Consolidated locking, reading, and status flag checking.
*
* If the metadata is inconsistent, setting READ_ALLOW_INCONSISTENT in
* misc_flags will return it with FAILED_INCONSISTENT set instead of
* giving you nothing.
*
* Use vg_read_error(vg) to determine the result. Nonzero means there were
* problems reading the volume group.
* Zero value means that the VG is open and appropriate locks are held.
*/
static struct volume_group *_vg_lock_and_read(struct cmd_context *cmd, const char *vg_name,
const char *vgid, uint32_t lock_flags,
uint64_t status_flags, uint32_t misc_flags)
{
struct volume_group *vg = NULL;
int consistent = 1;
int consistent_in;
uint32_t failure = 0;
int already_locked;
if (misc_flags & READ_ALLOW_INCONSISTENT || lock_flags != LCK_VG_WRITE)
consistent = 0;
if (!validate_name(vg_name) && !is_orphan_vg(vg_name)) {
log_error("Volume group name %s has invalid characters",
vg_name);
return NULL;
}
already_locked = vgname_is_locked(vg_name);
2009-07-15 09:50:22 +04:00
if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK) &&
!lock_vol(cmd, vg_name, lock_flags)) {
log_error("Can't get lock for %s", vg_name);
return _vg_make_handle(cmd, vg, FAILED_LOCKING);
}
if (is_orphan_vg(vg_name))
status_flags &= ~LVM_WRITE;
consistent_in = consistent;
/* If consistent == 1, we get NULL here if correction fails. */
if (!(vg = vg_read_internal(cmd, vg_name, vgid, &consistent))) {
if (consistent_in && !consistent) {
log_error("Volume group \"%s\" inconsistent.", vg_name);
failure |= FAILED_INCONSISTENT;
goto_bad;
}
log_error("Volume group \"%s\" not found", vg_name);
failure |= FAILED_NOTFOUND;
goto_bad;
}
if (vg_is_clustered(vg) && !locking_is_clustered()) {
log_error("Skipping clustered volume group %s", vg->name);
failure |= FAILED_CLUSTERED;
goto_bad;
}
/* consistent == 0 when VG is not found, but failed == FAILED_NOTFOUND */
if (!consistent && !failure) {
vg_release(vg);
if (!(vg = _recover_vg(cmd, vg_name, vgid))) {
log_error("Recovery of volume group \"%s\" failed.",
vg_name);
failure |= FAILED_INCONSISTENT;
goto_bad;
}
}
/*
* Check that the tool can handle tricky cases -- missing PVs and
* unknown segment types.
*/
if (!cmd->handles_missing_pvs && vg_missing_pv_count(vg) &&
lock_flags == LCK_VG_WRITE) {
log_error("Cannot change VG %s while PVs are missing.", vg->name);
log_error("Consider vgreduce --removemissing.");
failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
goto_bad;
}
if (!cmd->handles_unknown_segments && vg_has_unknown_segments(vg) &&
lock_flags == LCK_VG_WRITE) {
log_error("Cannot change VG %s with unknown segments in it!",
vg->name);
failure |= FAILED_INCONSISTENT; /* FIXME new failure code here? */
goto_bad;
}
failure |= _vg_bad_status_bits(vg, status_flags);
if (failure)
goto_bad;
return _vg_make_handle(cmd, vg, failure);
bad:
2009-07-15 09:50:22 +04:00
if (!already_locked && !(misc_flags & READ_WITHOUT_LOCK))
unlock_vg(cmd, vg_name);
return _vg_make_handle(cmd, vg, failure);
}
/*
* vg_read: High-level volume group metadata read function.
*
* vg_read_error() must be used on any handle returned to check for errors.
*
* - metadata inconsistent and automatic correction failed: FAILED_INCONSISTENT
* - VG is read-only: FAILED_READ_ONLY
* - VG is EXPORTED, unless flags has READ_ALLOW_EXPORTED: FAILED_EXPORTED
* - VG is not RESIZEABLE: FAILED_RESIZEABLE
* - locking failed: FAILED_LOCKING
*
* On failures, all locks are released, unless one of the following applies:
* - vgname_is_locked(lock_name) is true
* FIXME: remove the above 2 conditions if possible and make an error always
* release the lock.
*
* Volume groups are opened read-only unless flags contains READ_FOR_UPDATE.
*
* Checking for VG existence:
*
* FIXME: We want vg_read to attempt automatic recovery after acquiring a
* temporary write lock: if that fails, we bail out as usual, with failed &
* FAILED_INCONSISTENT. If it works, we are good to go. Code that's been in
* toollib just set lock_flags to LCK_VG_WRITE and called vg_read_internal with
* *consistent = 1.
*/
struct volume_group *vg_read(struct cmd_context *cmd, const char *vg_name,
const char *vgid, uint32_t flags)
{
uint64_t status = UINT64_C(0);
uint32_t lock_flags = LCK_VG_READ;
if (flags & READ_FOR_UPDATE) {
status |= EXPORTED_VG | LVM_WRITE;
lock_flags = LCK_VG_WRITE;
}
if (flags & READ_ALLOW_EXPORTED)
status &= ~EXPORTED_VG;
return _vg_lock_and_read(cmd, vg_name, vgid, lock_flags, status, flags);
}
/*
* A high-level volume group metadata reading function. Open a volume group for
* later update (this means the user code can change the metadata and later
* request the new metadata to be written and committed).
*/
struct volume_group *vg_read_for_update(struct cmd_context *cmd, const char *vg_name,
const char *vgid, uint32_t flags)
{
return vg_read(cmd, vg_name, vgid, flags | READ_FOR_UPDATE);
}
/*
* Test the validity of a VG handle returned by vg_read() or vg_read_for_update().
*/
uint32_t vg_read_error(struct volume_group *vg_handle)
{
if (!vg_handle)
return FAILED_ALLOCATION;
return vg_handle->read_status;
}
/*
* Lock a vgname and/or check for existence.
* Takes a WRITE lock on the vgname before scanning.
* If scanning fails or vgname found, release the lock.
* NOTE: If you find the return codes confusing, you might think of this
* function as similar to an open() call with O_CREAT and O_EXCL flags
* (open returns fail with -EEXIST if file already exists).
*
* Returns:
* FAILED_LOCKING - Cannot lock name
* FAILED_EXIST - VG name already exists - cannot reserve
* SUCCESS - VG name does not exist in system and WRITE lock held
*/
uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname)
{
if (!lock_vol(cmd, vgname, LCK_VG_WRITE)) {
return FAILED_LOCKING;
}
/* Find the vgname in the cache */
/* If it's not there we must do full scan to be completely sure */
if (!fmt_from_vgname(vgname, NULL)) {
lvmcache_label_scan(cmd, 0);
if (!fmt_from_vgname(vgname, NULL)) {
if (memlock()) {
/*
* FIXME: Disallow calling this function if
* memlock() is true.
*/
unlock_vg(cmd, vgname);
return FAILED_LOCKING;
}
lvmcache_label_scan(cmd, 2);
if (!fmt_from_vgname(vgname, NULL)) {
/* vgname not found after scanning */
return SUCCESS;
}
}
}
2009-06-30 22:39:31 +04:00
/* Found vgname so cannot reserve. */
unlock_vg(cmd, vgname);
return FAILED_EXIST;
}
void fid_add_mda(struct format_instance *fid, struct metadata_area *mda)
{
dm_list_add(mda_is_ignored(mda) ? &fid->metadata_areas_ignored :
&fid->metadata_areas_in_use, &mda->list);
}
int fid_add_mdas(struct format_instance *fid, struct dm_list *mdas)
{
struct metadata_area *mda, *mda_new;
dm_list_iterate_items(mda, mdas) {
mda_new = mda_copy(fid->fmt->cmd->mem, mda);
if (!mda_new)
return_0;
fid_add_mda(fid, mda_new);
}
return 1;
}
/*
* Copy constructor for a metadata_area.
*/
struct metadata_area *mda_copy(struct dm_pool *mem,
struct metadata_area *mda)
{
struct metadata_area *mda_new;
if (!(mda_new = dm_pool_alloc(mem, sizeof(*mda_new)))) {
log_error("metadata_area allocation failed");
return NULL;
}
memcpy(mda_new, mda, sizeof(*mda));
if (mda->ops->mda_metadata_locn_copy && mda->metadata_locn) {
mda_new->metadata_locn =
mda->ops->mda_metadata_locn_copy(mem, mda->metadata_locn);
if (!mda_new->metadata_locn) {
dm_pool_free(mem, mda_new);
return NULL;
}
}
dm_list_init(&mda_new->list);
return mda_new;
}
/*
* This function provides a way to answer the question on a format specific
* basis - does the format specfic context of these two metadata areas
* match?
*
* A metatdata_area is defined to be independent of the underlying context.
* This has the benefit that we can use the same abstraction to read disks
* (see _metadata_text_raw_ops) or files (see _metadata_text_file_ops).
* However, one downside is there is no format-independent way to determine
* whether a given metadata_area is attached to a specific device - in fact,
* it may not be attached to a device at all.
*
* Thus, LVM is structured such that an mda is not a member of struct
* physical_volume. The location of the mda depends on whether
* the PV is in a volume group. A PV not in a VG has an mda on the
* 'info->mda' list in lvmcache, while a PV in a VG has an mda on
* the vg->fid->metadata_areas_in_use list. For further details, see _vg_read(),
* and the sequence of creating the format_instance with fid->metadata_areas_in_use
* list, as well as the construction of the VG, with list of PVs (comes
* after the construction of the fid and list of mdas).
*/
unsigned mda_locns_match(struct metadata_area *mda1, struct metadata_area *mda2)
{
if (!mda1->ops->mda_locns_match || !mda2->ops->mda_locns_match ||
mda1->ops->mda_locns_match != mda2->ops->mda_locns_match)
return 0;
return mda1->ops->mda_locns_match(mda1, mda2);
}
unsigned mda_is_ignored(struct metadata_area *mda)
{
return (mda->status & MDA_IGNORED);
}
void mda_set_ignored(struct metadata_area *mda, unsigned mda_ignored)
{
void *locn = mda->metadata_locn;
unsigned old_mda_ignored = mda_is_ignored(mda);
if (mda_ignored && !old_mda_ignored)
mda->status |= MDA_IGNORED;
else if (!mda_ignored && old_mda_ignored)
mda->status &= ~MDA_IGNORED;
else
2010-06-30 17:51:11 +04:00
return; /* No change */
log_debug("%s ignored flag for mda %s at offset %" PRIu64 ".",
mda_ignored ? "Setting" : "Clearing",
2010-06-30 17:51:11 +04:00
mda->ops->mda_metadata_locn_name ? mda->ops->mda_metadata_locn_name(locn) : "",
mda->ops->mda_metadata_locn_offset ? mda->ops->mda_metadata_locn_offset(locn) : UINT64_C(0));
}
int mdas_empty_or_ignored(struct dm_list *mdas)
{
struct metadata_area *mda;
if (!dm_list_size(mdas))
return 1;
dm_list_iterate_items(mda, mdas) {
if (mda_is_ignored(mda))
return 1;
}
return 0;
}
int pv_change_metadataignore(struct physical_volume *pv, uint32_t mda_ignored)
{
const char *pv_name = pv_dev_name(pv);
if (mda_ignored && !pv_mda_used_count(pv)) {
log_error("Metadata areas on physical volume \"%s\" already "
"ignored.", pv_name);
return 0;
}
if (!mda_ignored && (pv_mda_used_count(pv) == pv_mda_count(pv))) {
log_error("Metadata areas on physical volume \"%s\" already "
"marked as in-use.", pv_name);
return 0;
}
if (!pv_mda_count(pv)) {
log_error("Physical volume \"%s\" has no metadata "
"areas.", pv_name);
return 0;
}
log_verbose("Marking metadata areas on physical volume \"%s\" "
"as %s.", pv_name, mda_ignored ? "ignored" : "in-use");
if (!pv_mda_set_ignored(pv, mda_ignored))
return_0;
/*
* Update vg_mda_copies based on the mdas in this PV.
* This is most likely what the user would expect - if they
* specify a specific PV to be ignored/un-ignored, they will
* most likely not want LVM to turn around and change the
* ignore / un-ignore value when it writes the VG to disk.
* This does not guarantee this PV's ignore bits will be
* preserved in future operations.
*/
if (!is_orphan(pv) &&
vg_mda_copies(pv->vg) != VGMETADATACOPIES_UNMANAGED) {
log_warn("WARNING: Changing preferred number of copies of VG %s "
"metadata from %"PRIu32" to %"PRIu32, pv_vg_name(pv),
vg_mda_copies(pv->vg), vg_mda_used_count(pv->vg));
vg_set_mda_copies(pv->vg, vg_mda_used_count(pv->vg));
}
return 1;
}
char alloc_policy_char(alloc_policy_t alloc)
{
switch (alloc) {
case ALLOC_CONTIGUOUS:
return 'c';
case ALLOC_CLING:
return 'l';
case ALLOC_NORMAL:
return 'n';
case ALLOC_ANYWHERE:
return 'a';
default:
return 'i';
}
}
char *tags_format_and_copy(struct dm_pool *mem, const struct dm_list *tags)
{
struct str_list *sl;
if (!dm_pool_begin_object(mem, 256)) {
log_error("dm_pool_begin_object failed");
return NULL;
}
dm_list_iterate_items(sl, tags) {
if (!dm_pool_grow_object(mem, sl->str, strlen(sl->str)) ||
(sl->list.n != tags && !dm_pool_grow_object(mem, ",", 1))) {
log_error("dm_pool_grow_object failed");
return NULL;
}
}
if (!dm_pool_grow_object(mem, "\0", 1)) {
log_error("dm_pool_grow_object failed");
return NULL;
}
return dm_pool_end_object(mem);
}
/**
2007-07-12 19:38:53 +04:00
* pv_by_path - Given a device path return a PV handle if it is a PV
* @cmd - handle to the LVM command instance
* @pv_name - device path to read for the PV
*
* Returns:
* NULL - device path does not contain a valid PV
* non-NULL - PV handle corresponding to device path
*
2007-07-12 19:38:53 +04:00
* FIXME: merge with find_pv_by_name ?
*/
struct physical_volume *pv_by_path(struct cmd_context *cmd, const char *pv_name)
{
struct dm_list mdas;
dm_list_init(&mdas);
return _pv_read(cmd, cmd->mem, pv_name, &mdas, NULL, 1, 0);
}